001package org.hl7.fhir.convertors.misc;
002
003import java.io.FileInputStream;
004import java.io.IOException;
005
006import javax.xml.parsers.DocumentBuilder;
007import javax.xml.parsers.DocumentBuilderFactory;
008import javax.xml.parsers.ParserConfigurationException;
009
010/*
011  Copyright (c) 2011+, HL7, Inc.
012  All rights reserved.
013  
014  Redistribution and use in source and binary forms, with or without modification, 
015  are permitted provided that the following conditions are met:
016    
017   * Redistributions of source code must retain the above copyright notice, this 
018     list of conditions and the following disclaimer.
019   * Redistributions in binary form must reproduce the above copyright notice, 
020     this list of conditions and the following disclaimer in the documentation 
021     and/or other materials provided with the distribution.
022   * Neither the name of HL7 nor the names of its contributors may be used to 
023     endorse or promote products derived from this software without specific 
024     prior written permission.
025  
026  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
027  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
028  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
029  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
030  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
031  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
032  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
033  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
034  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
035  POSSIBILITY OF SUCH DAMAGE.
036  
037 */
038
039
040import org.hl7.fhir.r4.model.CodeSystem;
041import org.hl7.fhir.r4.model.CodeSystem.CodeSystemContentMode;
042import org.hl7.fhir.r4.model.CodeSystem.ConceptDefinitionComponent;
043import org.hl7.fhir.r4.model.CodeSystem.PropertyType;
044import org.hl7.fhir.r4.model.CodeType;
045import org.hl7.fhir.r4.model.ContactPoint.ContactPointSystem;
046import org.hl7.fhir.r4.model.DateTimeType;
047import org.hl7.fhir.r4.model.Enumerations.PublicationStatus;
048import org.hl7.fhir.utilities.xml.XMLUtil;
049import org.w3c.dom.Document;
050import org.w3c.dom.Element;
051import org.xml.sax.SAXException;
052
053public class CountryCodesConverter {
054
055
056  private String source;
057  private String dest;
058
059  public static void main(String[] args) throws Exception {
060    CountryCodesConverter self = new CountryCodesConverter();
061    self.source = args[0];
062    self.dest = args[1];
063    self.execute();
064  }
065
066  private void execute() throws ParserConfigurationException, SAXException, IOException {
067    Document src = load();
068    CodeSystem cs1 = new CodeSystem();
069    CodeSystem cs2 = new CodeSystem();
070    CodeSystem cs3 = new CodeSystem();
071    setMetadata(src, cs1, "iso3166", "urn:iso:std:iso:3166", "", "");
072    setMetadata(src, cs2, "iso3166-2", "urn:iso:std:iso:3166:-2", "Part2", " Part 2");
073    cs1.addProperty().setCode("canonical").setDescription("The 2 letter code that identifies the same country (so 2/3/numeric codes can be aligned)").setType(PropertyType.CODE);
074    cs2.addProperty().setCode("country").setDescription("The 2 letter code that identifies the country for the subdivision").setType(PropertyType.CODE);
075    for (Element e : XMLUtil.getNamedChildren(src.getDocumentElement(), "country")) {
076      System.out.println(e.getAttribute("id"));
077      String c2 = XMLUtil.getNamedChildText(e, "alpha-2-code");
078      String c3 = XMLUtil.getNamedChildText(e, "alpha-3-code");
079      String cN = XMLUtil.getNamedChildText(e, "numeric-code");
080      Element n = XMLUtil.getNamedChildByAttribute(e, "short-name", "lang3code", "eng");
081      if (n == null)
082        n = XMLUtil.getNamedChildByAttribute(e, "short-name-upper-case", "lang3code", "eng");
083      if (n == null)
084        continue;
085      String name = n.getTextContent();
086      n = XMLUtil.getNamedChildByAttribute(e, "full-name", "lang3code", "eng");
087      if (n == null)
088        n = XMLUtil.getNamedChildByAttribute(e, "full-name-upper-case", "lang3code", "eng");
089      if (n == null)
090        n = XMLUtil.getNamedChildByAttribute(e, "short-name", "lang3code", "eng");
091      if (n == null)
092        n = XMLUtil.getNamedChildByAttribute(e, "short-name-upper-case", "lang3code", "eng");
093      String desc = n.getTextContent();
094      ConceptDefinitionComponent cc = cs1.addConcept();
095      cc.setCode(c2);
096      cc.setDisplay(name);
097      cc.setDefinition(desc);
098      poplang(e, cc);
099      if (c3 != null) {
100        cc = cs1.addConcept();
101        cc.setCode(c3);
102        cc.setDisplay(name);
103        cc.setDefinition(desc);
104        cc.addProperty().setCode("canonical").setValue(new CodeType(c2));
105        poplang(e, cc);
106      }
107      if (cN != null) {
108        cc = cs1.addConcept();
109        cc.setCode(cN);
110        cc.setDisplay(name);
111        cc.setDefinition(desc);
112        cc.addProperty().setCode("canonical").setValue(new CodeType(c2));
113        poplang(e, cc);
114      }
115      for (Element sd : XMLUtil.getNamedChildren(e, "subdivision")) {
116        cc = cs2.addConcept();
117        cc.setCode(XMLUtil.getNamedChildText(sd, "subdivision-code"));
118        Element l = XMLUtil.getNamedChild(sd, "subdivision-locale");
119        cc.setDisplay(XMLUtil.getNamedChildText(l, "subdivision-locale-name"));
120        cc.addProperty().setCode("country").setValue(new CodeType(c2));
121      }
122    }
123    cs1.setCount(cs1.getConcept().size());
124    cs2.setCount(cs2.getConcept().size());
125    throw new Error("Needs revisiting");
126//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(Utilities.path(dest, "4.0.1", "package", "CodeSstem-iso3166.json")), cs1);
127//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(Utilities.path(dest, "3.0.2", "package", "CodeSstem-iso3166.json")), cs1); // format hasn't changed
128//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(Utilities.path(dest, "4.0.1", "package", "CodeSstem-iso3166-2.json")), cs2);
129//    new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(Utilities.path(dest, "3.0.2", "package", "CodeSstem-iso3166-2.json")), cs2); // format hasn't changed
130  }
131
132  public void setMetadata(Document src, CodeSystem cs, String id, String url, String partName, String partTitle) {
133    cs.setId(id);
134    cs.setUrl(url);
135    cs.setName("ISOCountryCodes" + partName);
136    cs.setTitle("ISO Country Codes (ISO-3166)" + partTitle);
137    cs.setVersion(XMLUtil.getFirstChild(src.getDocumentElement()).getAttribute("version"));
138    cs.setStatus(PublicationStatus.ACTIVE);
139    cs.setExperimental(false);
140    cs.addContact().setName("FHIR Project Team").addTelecom().setSystem(ContactPointSystem.URL).setValue("http://hl7.org/fhir");
141    cs.setDateElement(new DateTimeType(src.getDocumentElement().getAttribute("generated")));
142    cs.setCopyright("Copyright ISO. See https://www.iso.org/obp/ui/#search/code/");
143    cs.setCaseSensitive(true);
144    cs.setContent(CodeSystemContentMode.COMPLETE);
145    cs.setLanguage("en");
146  }
147
148  public void poplang(Element e, ConceptDefinitionComponent cc) {
149    for (Element el : XMLUtil.getNamedChildren(e, "short-name")) {
150      if (!el.getAttribute("lang3code").equals("eng")) {
151        String l2 = lang3To2(el.getAttribute("lang3code"));
152        if (l2 != null)
153          cc.addDesignation().setLanguage(l2).setValue(el.getTextContent());
154      }
155    }
156  }
157
158  private String lang3To2(String lang) {
159    if ("abk".equals(lang)) return "ab";
160    if ("aar".equals(lang)) return "aa";
161    if ("afr".equals(lang)) return "af";
162    if ("aka".equals(lang)) return "ak";
163    if ("sqi".equals(lang)) return "sq";
164    if ("amh".equals(lang)) return "am";
165    if ("ara".equals(lang)) return "ar";
166    if ("arg".equals(lang)) return "an";
167    if ("hye".equals(lang)) return "hy";
168    if ("asm".equals(lang)) return "as";
169    if ("ava".equals(lang)) return "av";
170    if ("ave".equals(lang)) return "ae";
171    if ("aym".equals(lang)) return "ay";
172    if ("aze".equals(lang)) return "az";
173    if ("bam".equals(lang)) return "bm";
174    if ("bak".equals(lang)) return "ba";
175    if ("eus".equals(lang)) return "eu";
176    if ("bel".equals(lang)) return "be";
177    if ("ben".equals(lang)) return "bn";
178    if ("bih".equals(lang)) return "bh";
179    if ("bis".equals(lang)) return "bi";
180    if ("bos".equals(lang)) return "bs";
181    if ("bre".equals(lang)) return "br";
182    if ("bul".equals(lang)) return "bg";
183    if ("mya".equals(lang)) return "my";
184    if ("cat".equals(lang)) return "ca";
185    if ("khm".equals(lang)) return "km";
186    if ("cha".equals(lang)) return "ch";
187    if ("che".equals(lang)) return "ce";
188    if ("nya".equals(lang)) return "ny";
189    if ("zho".equals(lang)) return "zh";
190    if ("chu".equals(lang)) return "cu";
191    if ("chv".equals(lang)) return "cv";
192    if ("cor".equals(lang)) return "kw";
193    if ("cos".equals(lang)) return "co";
194    if ("cre".equals(lang)) return "cr";
195    if ("hrv".equals(lang)) return "hr";
196    if ("ces".equals(lang)) return "cs";
197    if ("dan".equals(lang)) return "da";
198    if ("div".equals(lang)) return "dv";
199    if ("nld".equals(lang)) return "nl";
200    if ("dzo".equals(lang)) return "dz";
201    if ("eng".equals(lang)) return "en";
202    if ("epo".equals(lang)) return "eo";
203    if ("est".equals(lang)) return "et";
204    if ("ewe".equals(lang)) return "ee";
205    if ("fao".equals(lang)) return "fo";
206    if ("fij".equals(lang)) return "fj";
207    if ("fin".equals(lang)) return "fi";
208    if ("fra".equals(lang)) return "fr";
209    if ("ful".equals(lang)) return "ff";
210    if ("gla".equals(lang)) return "gd";
211    if ("glg".equals(lang)) return "gl";
212    if ("lug".equals(lang)) return "lg";
213    if ("kat".equals(lang)) return "ka";
214    if ("deu".equals(lang)) return "de";
215    if ("ell".equals(lang)) return "el";
216    if ("grn".equals(lang)) return "gn";
217    if ("guj".equals(lang)) return "gu";
218    if ("hat".equals(lang)) return "ht";
219    if ("hau".equals(lang)) return "ha";
220    if ("heb".equals(lang)) return "he";
221    if ("her".equals(lang)) return "hz";
222    if ("hin".equals(lang)) return "hi";
223    if ("hmo".equals(lang)) return "ho";
224    if ("hun".equals(lang)) return "hu";
225    if ("isl".equals(lang)) return "is";
226    if ("ido".equals(lang)) return "io";
227    if ("ibo".equals(lang)) return "ig";
228    if ("ind".equals(lang)) return "id";
229    if ("ina".equals(lang)) return "ia";
230    if ("ile".equals(lang)) return "ie";
231    if ("iku".equals(lang)) return "iu";
232    if ("ipk".equals(lang)) return "ik";
233    if ("gle".equals(lang)) return "ga";
234    if ("ita".equals(lang)) return "it";
235    if ("jpn".equals(lang)) return "ja";
236    if ("jav".equals(lang)) return "jv";
237    if ("kal".equals(lang)) return "kl";
238    if ("kan".equals(lang)) return "kn";
239    if ("kau".equals(lang)) return "kr";
240    if ("kas".equals(lang)) return "ks";
241    if ("kaz".equals(lang)) return "kk";
242    if ("kik".equals(lang)) return "ki";
243    if ("kin".equals(lang)) return "rw";
244    if ("kir".equals(lang)) return "ky";
245    if ("kom".equals(lang)) return "kv";
246    if ("kon".equals(lang)) return "kg";
247    if ("kor".equals(lang)) return "ko";
248    if ("kua".equals(lang)) return "kj";
249    if ("kur".equals(lang)) return "ku";
250    if ("lao".equals(lang)) return "lo";
251    if ("lat".equals(lang)) return "la";
252    if ("lav".equals(lang)) return "lv";
253    if ("lim".equals(lang)) return "li";
254    if ("lin".equals(lang)) return "ln";
255    if ("lit".equals(lang)) return "lt";
256    if ("lub".equals(lang)) return "lu";
257    if ("ltz".equals(lang)) return "lb";
258    if ("mkd".equals(lang)) return "mk";
259    if ("mlg".equals(lang)) return "mg";
260    if ("msa".equals(lang)) return "ms";
261    if ("mal".equals(lang)) return "ml";
262    if ("mlt".equals(lang)) return "mt";
263    if ("glv".equals(lang)) return "gv";
264    if ("mri".equals(lang)) return "mi";
265    if ("mar".equals(lang)) return "mr";
266    if ("mah".equals(lang)) return "mh";
267    if ("mon".equals(lang)) return "mn";
268    if ("nau".equals(lang)) return "na";
269    if ("nav".equals(lang)) return "nv";
270    if ("ndo".equals(lang)) return "ng";
271    if ("nep".equals(lang)) return "ne";
272    if ("nde".equals(lang)) return "nd";
273    if ("sme".equals(lang)) return "se";
274    if ("nor".equals(lang)) return "no";
275    if ("nob".equals(lang)) return "nb";
276    if ("nno".equals(lang)) return "nn";
277    if ("oci".equals(lang)) return "oc";
278    if ("oji".equals(lang)) return "oj";
279    if ("ori".equals(lang)) return "or";
280    if ("orm".equals(lang)) return "om";
281    if ("oss".equals(lang)) return "os";
282    if ("pli".equals(lang)) return "pi";
283    if ("pan".equals(lang)) return "pa";
284    if ("pus".equals(lang)) return "ps";
285    if ("fas".equals(lang)) return "fa";
286    if ("pol".equals(lang)) return "pl";
287    if ("por".equals(lang)) return "pt";
288    if ("que".equals(lang)) return "qu";
289    if ("ron".equals(lang)) return "ro";
290    if ("roh".equals(lang)) return "rm";
291    if ("run".equals(lang)) return "rn";
292    if ("rus".equals(lang)) return "ru";
293    if ("smo".equals(lang)) return "sm";
294    if ("sag".equals(lang)) return "sg";
295    if ("san".equals(lang)) return "sa";
296    if ("srd".equals(lang)) return "sc";
297    if ("srp".equals(lang)) return "sr";
298    if ("sna".equals(lang)) return "sn";
299    if ("iii".equals(lang)) return "ii";
300    if ("snd".equals(lang)) return "sd";
301    if ("sin".equals(lang)) return "si";
302    if ("slk".equals(lang)) return "sk";
303    if ("slv".equals(lang)) return "sl";
304    if ("som".equals(lang)) return "so";
305    if ("nbl".equals(lang)) return "nr";
306    if ("sot".equals(lang)) return "st";
307    if ("spa".equals(lang)) return "es";
308    if ("sun".equals(lang)) return "su";
309    if ("swa".equals(lang)) return "sw";
310    if ("ssw".equals(lang)) return "ss";
311    if ("swe".equals(lang)) return "sv";
312    if ("tgl".equals(lang)) return "tl";
313    if ("tah".equals(lang)) return "ty";
314    if ("tgk".equals(lang)) return "tg";
315    if ("tam".equals(lang)) return "ta";
316    if ("tat".equals(lang)) return "tt";
317    if ("tel".equals(lang)) return "te";
318    if ("tha".equals(lang)) return "th";
319    if ("bod".equals(lang)) return "bo";
320    if ("tir".equals(lang)) return "ti";
321    if ("ton".equals(lang)) return "to";
322    if ("tso".equals(lang)) return "ts";
323    if ("tsn".equals(lang)) return "tn";
324    if ("tur".equals(lang)) return "tr";
325    if ("tuk".equals(lang)) return "tk";
326    if ("twi".equals(lang)) return "tw";
327    if ("uig".equals(lang)) return "ug";
328    if ("ukr".equals(lang)) return "uk";
329    if ("urd".equals(lang)) return "ur";
330    if ("uzb".equals(lang)) return "uz";
331    if ("ven".equals(lang)) return "ve";
332    if ("vie".equals(lang)) return "vi";
333    if ("vol".equals(lang)) return "vo";
334    if ("wln".equals(lang)) return "wa";
335    if ("cym".equals(lang)) return "cy";
336    if ("fry".equals(lang)) return "fy";
337    if ("wol".equals(lang)) return "wo";
338    if ("xho".equals(lang)) return "xh";
339    if ("yid".equals(lang)) return "yi";
340    if ("yor".equals(lang)) return "yo";
341    if ("zha".equals(lang)) return "za";
342    if ("zul".equals(lang)) return "zu";
343    if ("pap".equals(lang)) return "pap";
344    if ("gil".equals(lang)) return "gil";
345    if ("002".equals(lang)) return null;
346    if ("cnr".equals(lang)) return "cnr";
347    if ("niu".equals(lang)) return "niu";
348    if ("tpi".equals(lang)) return "tpi";
349    if ("pau".equals(lang)) return "pau";
350    if ("crs".equals(lang)) return null;
351    if ("tkl".equals(lang)) return "tkl";
352    if ("tet".equals(lang)) return "tet";
353    if ("tvl".equals(lang)) return "tvl";
354    if ("nso".equals(lang)) return "nso";
355    throw new Error("unknown 3 letter lang code " + lang);
356  }
357
358  private Document load() throws ParserConfigurationException, SAXException, IOException {
359    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
360    factory.setNamespaceAware(true);
361    DocumentBuilder builder = factory.newDocumentBuilder();
362
363    return builder.parse(new FileInputStream(source));
364  }
365
366
367}