001package org.hl7.fhir.convertors.misc;
002
003import java.io.FileInputStream;
004import java.io.IOException;
005
006import javax.xml.parsers.DocumentBuilder;
007import javax.xml.parsers.DocumentBuilderFactory;
008import javax.xml.parsers.ParserConfigurationException;
009
010/*
011  Copyright (c) 2011+, HL7, Inc.
012  All rights reserved.
013  
014  Redistribution and use in source and binary forms, with or without modification, 
015  are permitted provided that the following conditions are met:
016    
017   * Redistributions of source code must retain the above copyright notice, this 
018     list of conditions and the following disclaimer.
019   * Redistributions in binary form must reproduce the above copyright notice, 
020     this list of conditions and the following disclaimer in the documentation 
021     and/or other materials provided with the distribution.
022   * Neither the name of HL7 nor the names of its contributors may be used to 
023     endorse or promote products derived from this software without specific 
024     prior written permission.
025  
026  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
027  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
028  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
029  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
030  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
031  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
032  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
033  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
034  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
035  POSSIBILITY OF SUCH DAMAGE.
036  
037 */
038
039
040import org.hl7.fhir.r4.model.CodeSystem;
041import org.hl7.fhir.r4.model.CodeSystem.CodeSystemContentMode;
042import org.hl7.fhir.r4.model.CodeSystem.ConceptDefinitionComponent;
043import org.hl7.fhir.r4.model.CodeSystem.PropertyType;
044import org.hl7.fhir.r4.model.CodeType;
045import org.hl7.fhir.r4.model.ContactPoint.ContactPointSystem;
046import org.hl7.fhir.r4.model.DateTimeType;
047import org.hl7.fhir.r4.model.Enumerations.PublicationStatus;
048import org.hl7.fhir.utilities.filesystem.ManagedFileAccess;
049import org.hl7.fhir.utilities.xml.XMLUtil;
050import org.w3c.dom.Document;
051import org.w3c.dom.Element;
052import org.xml.sax.SAXException;
053
054public class CountryCodesConverter {
055
056
057  private String source;
058  private String dest;
059
060  public static void main(String[] args) throws Exception {
061    CountryCodesConverter self = new CountryCodesConverter();
062    self.source = args[0];
063    self.dest = args[1];
064    self.execute();
065  }
066
067  private void execute() throws ParserConfigurationException, SAXException, IOException {
068    Document src = load();
069    CodeSystem cs1 = new CodeSystem();
070    CodeSystem cs2 = new CodeSystem();
071    CodeSystem cs3 = new CodeSystem();
072    setMetadata(src, cs1, "iso3166", "urn:iso:std:iso:3166", "", "");
073    setMetadata(src, cs2, "iso3166-2", "urn:iso:std:iso:3166:-2", "Part2", " Part 2");
074    cs1.addProperty().setCode("canonical").setDescription("The 2 letter code that identifies the same country (so 2/3/numeric codes can be aligned)").setType(PropertyType.CODE);
075    cs2.addProperty().setCode("country").setDescription("The 2 letter code that identifies the country for the subdivision").setType(PropertyType.CODE);
076    for (Element e : XMLUtil.getNamedChildren(src.getDocumentElement(), "country")) {
077      System.out.println(e.getAttribute("id"));
078      String c2 = XMLUtil.getNamedChildText(e, "alpha-2-code");
079      String c3 = XMLUtil.getNamedChildText(e, "alpha-3-code");
080      String cN = XMLUtil.getNamedChildText(e, "numeric-code");
081      Element n = XMLUtil.getNamedChildByAttribute(e, "short-name", "lang3code", "eng");
082      if (n == null)
083        n = XMLUtil.getNamedChildByAttribute(e, "short-name-upper-case", "lang3code", "eng");
084      if (n == null)
085        continue;
086      String name = n.getTextContent();
087      n = XMLUtil.getNamedChildByAttribute(e, "full-name", "lang3code", "eng");
088      if (n == null)
089        n = XMLUtil.getNamedChildByAttribute(e, "full-name-upper-case", "lang3code", "eng");
090      if (n == null)
091        n = XMLUtil.getNamedChildByAttribute(e, "short-name", "lang3code", "eng");
092      if (n == null)
093        n = XMLUtil.getNamedChildByAttribute(e, "short-name-upper-case", "lang3code", "eng");
094      String desc = n.getTextContent();
095      ConceptDefinitionComponent cc = cs1.addConcept();
096      cc.setCode(c2);
097      cc.setDisplay(name);
098      cc.setDefinition(desc);
099      poplang(e, cc);
100      if (c3 != null) {
101        cc = cs1.addConcept();
102        cc.setCode(c3);
103        cc.setDisplay(name);
104        cc.setDefinition(desc);
105        cc.addProperty().setCode("canonical").setValue(new CodeType(c2));
106        poplang(e, cc);
107      }
108      if (cN != null) {
109        cc = cs1.addConcept();
110        cc.setCode(cN);
111        cc.setDisplay(name);
112        cc.setDefinition(desc);
113        cc.addProperty().setCode("canonical").setValue(new CodeType(c2));
114        poplang(e, cc);
115      }
116      for (Element sd : XMLUtil.getNamedChildren(e, "subdivision")) {
117        cc = cs2.addConcept();
118        cc.setCode(XMLUtil.getNamedChildText(sd, "subdivision-code"));
119        Element l = XMLUtil.getNamedChild(sd, "subdivision-locale");
120        cc.setDisplay(XMLUtil.getNamedChildText(l, "subdivision-locale-name"));
121        cc.addProperty().setCode("country").setValue(new CodeType(c2));
122      }
123    }
124    cs1.setCount(cs1.getConcept().size());
125    cs2.setCount(cs2.getConcept().size());
126    throw new Error("Needs revisiting");
127//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(ManagedFileAccess.outStream(Utilities.path(dest, "4.0.1", "package", "CodeSstem-iso3166.json")), cs1);
128//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(ManagedFileAccess.outStream(Utilities.path(dest, "3.0.2", "package", "CodeSstem-iso3166.json")), cs1); // format hasn't changed
129//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(ManagedFileAccess.outStream(Utilities.path(dest, "4.0.1", "package", "CodeSstem-iso3166-2.json")), cs2);
130//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(ManagedFileAccess.outStream(Utilities.path(dest, "3.0.2", "package", "CodeSstem-iso3166-2.json")), cs2); // format hasn't changed
131  }
132
133  public void setMetadata(Document src, CodeSystem cs, String id, String url, String partName, String partTitle) {
134    cs.setId(id);
135    cs.setUrl(url);
136    cs.setName("ISOCountryCodes" + partName);
137    cs.setTitle("ISO Country Codes (ISO-3166)" + partTitle);
138    cs.setVersion(XMLUtil.getFirstChild(src.getDocumentElement()).getAttribute("version"));
139    cs.setStatus(PublicationStatus.ACTIVE);
140    cs.setExperimental(false);
141    cs.addContact().setName("FHIR Project Team").addTelecom().setSystem(ContactPointSystem.URL).setValue("http://hl7.org/fhir");
142    cs.setDateElement(new DateTimeType(src.getDocumentElement().getAttribute("generated")));
143    cs.setCopyright("Copyright ISO. See https://www.iso.org/obp/ui/#search/code/");
144    cs.setCaseSensitive(true);
145    cs.setContent(CodeSystemContentMode.COMPLETE);
146    cs.setLanguage("en");
147  }
148
149  public void poplang(Element e, ConceptDefinitionComponent cc) {
150    for (Element el : XMLUtil.getNamedChildren(e, "short-name")) {
151      if (!el.getAttribute("lang3code").equals("eng")) {
152        String l2 = lang3To2(el.getAttribute("lang3code"));
153        if (l2 != null)
154          cc.addDesignation().setLanguage(l2).setValue(el.getTextContent());
155      }
156    }
157  }
158
159  private String lang3To2(String lang) {
160    if ("abk".equals(lang)) return "ab";
161    if ("aar".equals(lang)) return "aa";
162    if ("afr".equals(lang)) return "af";
163    if ("aka".equals(lang)) return "ak";
164    if ("sqi".equals(lang)) return "sq";
165    if ("amh".equals(lang)) return "am";
166    if ("ara".equals(lang)) return "ar";
167    if ("arg".equals(lang)) return "an";
168    if ("hye".equals(lang)) return "hy";
169    if ("asm".equals(lang)) return "as";
170    if ("ava".equals(lang)) return "av";
171    if ("ave".equals(lang)) return "ae";
172    if ("aym".equals(lang)) return "ay";
173    if ("aze".equals(lang)) return "az";
174    if ("bam".equals(lang)) return "bm";
175    if ("bak".equals(lang)) return "ba";
176    if ("eus".equals(lang)) return "eu";
177    if ("bel".equals(lang)) return "be";
178    if ("ben".equals(lang)) return "bn";
179    if ("bih".equals(lang)) return "bh";
180    if ("bis".equals(lang)) return "bi";
181    if ("bos".equals(lang)) return "bs";
182    if ("bre".equals(lang)) return "br";
183    if ("bul".equals(lang)) return "bg";
184    if ("mya".equals(lang)) return "my";
185    if ("cat".equals(lang)) return "ca";
186    if ("khm".equals(lang)) return "km";
187    if ("cha".equals(lang)) return "ch";
188    if ("che".equals(lang)) return "ce";
189    if ("nya".equals(lang)) return "ny";
190    if ("zho".equals(lang)) return "zh";
191    if ("chu".equals(lang)) return "cu";
192    if ("chv".equals(lang)) return "cv";
193    if ("cor".equals(lang)) return "kw";
194    if ("cos".equals(lang)) return "co";
195    if ("cre".equals(lang)) return "cr";
196    if ("hrv".equals(lang)) return "hr";
197    if ("ces".equals(lang)) return "cs";
198    if ("dan".equals(lang)) return "da";
199    if ("div".equals(lang)) return "dv";
200    if ("nld".equals(lang)) return "nl";
201    if ("dzo".equals(lang)) return "dz";
202    if ("eng".equals(lang)) return "en";
203    if ("epo".equals(lang)) return "eo";
204    if ("est".equals(lang)) return "et";
205    if ("ewe".equals(lang)) return "ee";
206    if ("fao".equals(lang)) return "fo";
207    if ("fij".equals(lang)) return "fj";
208    if ("fin".equals(lang)) return "fi";
209    if ("fra".equals(lang)) return "fr";
210    if ("ful".equals(lang)) return "ff";
211    if ("gla".equals(lang)) return "gd";
212    if ("glg".equals(lang)) return "gl";
213    if ("lug".equals(lang)) return "lg";
214    if ("kat".equals(lang)) return "ka";
215    if ("deu".equals(lang)) return "de";
216    if ("ell".equals(lang)) return "el";
217    if ("grn".equals(lang)) return "gn";
218    if ("guj".equals(lang)) return "gu";
219    if ("hat".equals(lang)) return "ht";
220    if ("hau".equals(lang)) return "ha";
221    if ("heb".equals(lang)) return "he";
222    if ("her".equals(lang)) return "hz";
223    if ("hin".equals(lang)) return "hi";
224    if ("hmo".equals(lang)) return "ho";
225    if ("hun".equals(lang)) return "hu";
226    if ("isl".equals(lang)) return "is";
227    if ("ido".equals(lang)) return "io";
228    if ("ibo".equals(lang)) return "ig";
229    if ("ind".equals(lang)) return "id";
230    if ("ina".equals(lang)) return "ia";
231    if ("ile".equals(lang)) return "ie";
232    if ("iku".equals(lang)) return "iu";
233    if ("ipk".equals(lang)) return "ik";
234    if ("gle".equals(lang)) return "ga";
235    if ("ita".equals(lang)) return "it";
236    if ("jpn".equals(lang)) return "ja";
237    if ("jav".equals(lang)) return "jv";
238    if ("kal".equals(lang)) return "kl";
239    if ("kan".equals(lang)) return "kn";
240    if ("kau".equals(lang)) return "kr";
241    if ("kas".equals(lang)) return "ks";
242    if ("kaz".equals(lang)) return "kk";
243    if ("kik".equals(lang)) return "ki";
244    if ("kin".equals(lang)) return "rw";
245    if ("kir".equals(lang)) return "ky";
246    if ("kom".equals(lang)) return "kv";
247    if ("kon".equals(lang)) return "kg";
248    if ("kor".equals(lang)) return "ko";
249    if ("kua".equals(lang)) return "kj";
250    if ("kur".equals(lang)) return "ku";
251    if ("lao".equals(lang)) return "lo";
252    if ("lat".equals(lang)) return "la";
253    if ("lav".equals(lang)) return "lv";
254    if ("lim".equals(lang)) return "li";
255    if ("lin".equals(lang)) return "ln";
256    if ("lit".equals(lang)) return "lt";
257    if ("lub".equals(lang)) return "lu";
258    if ("ltz".equals(lang)) return "lb";
259    if ("mkd".equals(lang)) return "mk";
260    if ("mlg".equals(lang)) return "mg";
261    if ("msa".equals(lang)) return "ms";
262    if ("mal".equals(lang)) return "ml";
263    if ("mlt".equals(lang)) return "mt";
264    if ("glv".equals(lang)) return "gv";
265    if ("mri".equals(lang)) return "mi";
266    if ("mar".equals(lang)) return "mr";
267    if ("mah".equals(lang)) return "mh";
268    if ("mon".equals(lang)) return "mn";
269    if ("nau".equals(lang)) return "na";
270    if ("nav".equals(lang)) return "nv";
271    if ("ndo".equals(lang)) return "ng";
272    if ("nep".equals(lang)) return "ne";
273    if ("nde".equals(lang)) return "nd";
274    if ("sme".equals(lang)) return "se";
275    if ("nor".equals(lang)) return "no";
276    if ("nob".equals(lang)) return "nb";
277    if ("nno".equals(lang)) return "nn";
278    if ("oci".equals(lang)) return "oc";
279    if ("oji".equals(lang)) return "oj";
280    if ("ori".equals(lang)) return "or";
281    if ("orm".equals(lang)) return "om";
282    if ("oss".equals(lang)) return "os";
283    if ("pli".equals(lang)) return "pi";
284    if ("pan".equals(lang)) return "pa";
285    if ("pus".equals(lang)) return "ps";
286    if ("fas".equals(lang)) return "fa";
287    if ("pol".equals(lang)) return "pl";
288    if ("por".equals(lang)) return "pt";
289    if ("que".equals(lang)) return "qu";
290    if ("ron".equals(lang)) return "ro";
291    if ("roh".equals(lang)) return "rm";
292    if ("run".equals(lang)) return "rn";
293    if ("rus".equals(lang)) return "ru";
294    if ("smo".equals(lang)) return "sm";
295    if ("sag".equals(lang)) return "sg";
296    if ("san".equals(lang)) return "sa";
297    if ("srd".equals(lang)) return "sc";
298    if ("srp".equals(lang)) return "sr";
299    if ("sna".equals(lang)) return "sn";
300    if ("iii".equals(lang)) return "ii";
301    if ("snd".equals(lang)) return "sd";
302    if ("sin".equals(lang)) return "si";
303    if ("slk".equals(lang)) return "sk";
304    if ("slv".equals(lang)) return "sl";
305    if ("som".equals(lang)) return "so";
306    if ("nbl".equals(lang)) return "nr";
307    if ("sot".equals(lang)) return "st";
308    if ("spa".equals(lang)) return "es";
309    if ("sun".equals(lang)) return "su";
310    if ("swa".equals(lang)) return "sw";
311    if ("ssw".equals(lang)) return "ss";
312    if ("swe".equals(lang)) return "sv";
313    if ("tgl".equals(lang)) return "tl";
314    if ("tah".equals(lang)) return "ty";
315    if ("tgk".equals(lang)) return "tg";
316    if ("tam".equals(lang)) return "ta";
317    if ("tat".equals(lang)) return "tt";
318    if ("tel".equals(lang)) return "te";
319    if ("tha".equals(lang)) return "th";
320    if ("bod".equals(lang)) return "bo";
321    if ("tir".equals(lang)) return "ti";
322    if ("ton".equals(lang)) return "to";
323    if ("tso".equals(lang)) return "ts";
324    if ("tsn".equals(lang)) return "tn";
325    if ("tur".equals(lang)) return "tr";
326    if ("tuk".equals(lang)) return "tk";
327    if ("twi".equals(lang)) return "tw";
328    if ("uig".equals(lang)) return "ug";
329    if ("ukr".equals(lang)) return "uk";
330    if ("urd".equals(lang)) return "ur";
331    if ("uzb".equals(lang)) return "uz";
332    if ("ven".equals(lang)) return "ve";
333    if ("vie".equals(lang)) return "vi";
334    if ("vol".equals(lang)) return "vo";
335    if ("wln".equals(lang)) return "wa";
336    if ("cym".equals(lang)) return "cy";
337    if ("fry".equals(lang)) return "fy";
338    if ("wol".equals(lang)) return "wo";
339    if ("xho".equals(lang)) return "xh";
340    if ("yid".equals(lang)) return "yi";
341    if ("yor".equals(lang)) return "yo";
342    if ("zha".equals(lang)) return "za";
343    if ("zul".equals(lang)) return "zu";
344    if ("pap".equals(lang)) return "pap";
345    if ("gil".equals(lang)) return "gil";
346    if ("002".equals(lang)) return null;
347    if ("cnr".equals(lang)) return "cnr";
348    if ("niu".equals(lang)) return "niu";
349    if ("tpi".equals(lang)) return "tpi";
350    if ("pau".equals(lang)) return "pau";
351    if ("crs".equals(lang)) return null;
352    if ("tkl".equals(lang)) return "tkl";
353    if ("tet".equals(lang)) return "tet";
354    if ("tvl".equals(lang)) return "tvl";
355    if ("nso".equals(lang)) return "nso";
356    throw new Error("unknown 3 letter lang code " + lang);
357  }
358
359  private Document load() throws ParserConfigurationException, SAXException, IOException {
360    DocumentBuilderFactory factory = XMLUtil.newXXEProtectedDocumentBuilderFactory();
361    factory.setNamespaceAware(true);
362    DocumentBuilder builder = factory.newDocumentBuilder();
363
364    return builder.parse(ManagedFileAccess.inStream(source));
365  }
366
367
368}