001package org.hl7.fhir.convertors.analytics;
002
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.util.*;
007
008import javax.xml.parsers.ParserConfigurationException;
009
010import lombok.extern.slf4j.Slf4j;
011import org.hl7.fhir.exceptions.FHIRException;
012import org.hl7.fhir.r5.utils.EOperationOutcome;
013import org.hl7.fhir.utilities.FileUtilities;
014import org.hl7.fhir.utilities.Utilities;
015import org.hl7.fhir.utilities.filesystem.ManagedFileAccess;
016import org.hl7.fhir.utilities.http.HTTPResult;
017import org.hl7.fhir.utilities.http.ManagedWebAccess;
018import org.hl7.fhir.utilities.json.model.JsonArray;
019import org.hl7.fhir.utilities.json.model.JsonObject;
020import org.hl7.fhir.utilities.json.parser.JsonParser;
021import org.hl7.fhir.utilities.npm.FilesystemPackageCacheManager;
022import org.hl7.fhir.utilities.npm.NpmPackage;
023import org.hl7.fhir.utilities.npm.NpmPackage.PackagedResourceFile;
024import org.hl7.fhir.utilities.npm.PackageClient;
025import org.hl7.fhir.utilities.npm.PackageInfo;
026import org.hl7.fhir.utilities.npm.PackageServer;
027import org.hl7.fhir.utilities.xml.XMLUtil;
028import org.w3c.dom.Document;
029import org.w3c.dom.Element;
030import org.xml.sax.SAXException;
031
032@Slf4j
033public class PackageVisitor {
034
035  private PackageServer clientPackageServer = null;
036
037  public void setClientPackageServer(PackageServer packageServer) {
038    this.clientPackageServer = packageServer;
039  }
040  private List<PackageServer> cachePackageServers = null;
041  public void setCachePackageServers(List<PackageServer> packageServers) {
042    this.cachePackageServers = packageServers;
043  }
044
045  public static class PackageContext {
046    private String pid;
047    private NpmPackage npm;
048    private String version;
049    protected PackageContext(String pid, NpmPackage npm, String version) {
050      super();
051      this.pid = pid;
052      this.npm = npm;
053      this.version = version;
054    }
055    public String getPid() {
056      return pid;
057    }
058    public NpmPackage getNpm() {
059      return npm;
060    }
061    public String getVersion() {
062      return version;
063    }
064  }
065  
066  public interface IPackageVisitorProcessor {
067    public Object startPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome;
068    public void processResource(PackageContext context, Object clientContext, String type, String id, byte[] content) throws FHIRException, IOException, EOperationOutcome;
069    public void finishPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome;
070
071    public void alreadyVisited(String pid) throws FHIRException, IOException, EOperationOutcome;
072  }
073
074  private Set<String> resourceTypes = new HashSet<>();
075  private List<String> versions = new ArrayList<>();
076  private boolean corePackages;
077  private boolean oldVersions;
078  private boolean current;
079  private IPackageVisitorProcessor processor;
080  private FilesystemPackageCacheManager pcm;
081  private PackageClient pc;
082  private String cache;  
083  private int step;
084
085  public Set<String> getResourceTypes() {
086    return resourceTypes;
087  }
088
089  public void setResourceTypes(Set<String> resourceTypes) {
090    this.resourceTypes = resourceTypes;
091  }
092
093  public void setResourceTypes(String... resourceTypes) {
094    this.resourceTypes = new HashSet<String>();
095    for (String s : resourceTypes) {
096      this.resourceTypes.add(s);
097    }
098  }
099
100  public List<String> getVersions() {
101    return versions;
102  }
103
104  public void setVersions(List<String> versions) {
105    this.versions = versions;
106  }
107
108
109  public boolean isCurrent() {
110    return current;
111  }
112
113  public void setCurrent(boolean current) {
114    this.current = current;
115  }
116
117  public boolean isCorePackages() {
118    return corePackages;
119  }
120
121
122
123
124  public String getCache() {
125    return cache;
126  }
127
128  public void setCache(String cache) {
129    this.cache = cache;
130  }
131
132  public void setCorePackages(boolean corePackages) {
133    this.corePackages = corePackages;
134  }
135
136
137
138
139  public boolean isOldVersions() {
140    return oldVersions;
141  }
142
143
144
145
146  public void setOldVersions(boolean oldVersions) {
147    this.oldVersions = oldVersions;
148  }
149
150
151
152
153  public IPackageVisitorProcessor getProcessor() {
154    return processor;
155  }
156
157  public void setProcessor(IPackageVisitorProcessor processor) {
158    this.processor = processor;
159  }
160
161  public void visitPackages() throws IOException, ParserConfigurationException, SAXException, FHIRException, EOperationOutcome {
162    log.info("Finding packages");
163    pc = clientPackageServer == null
164      ? new PackageClient(PackageServer.primaryServer())
165      : new PackageClient(clientPackageServer);
166
167    pcm = cachePackageServers == null
168      ? new FilesystemPackageCacheManager.Builder().build()
169      : new FilesystemPackageCacheManager.Builder().withPackageServers(cachePackageServers).build();
170
171    Set<String> pidList = getAllPackages();
172
173    Map<String, String> cpidMap = getAllCIPackages();
174    Set<String> cpidSet = new HashSet<>();
175    log.info("Go: "+cpidMap.size()+" current packages");
176    int i = 0;
177    for (String s : cpidMap.keySet()) {
178      processCurrentPackage(cpidMap.get(s), s, cpidSet, i, cpidMap.size()); 
179      i++;
180    }
181
182    log.info("Go: "+pidList.size()+" published packages");
183    i = 0;
184    for (String pid : pidList) {  
185      if (pid != null) {
186        if (!cpidSet.contains(pid)) {
187          cpidSet.add(pid);
188          if (step == 0 || step == 3) {
189            List<String> vList = listVersions(pid);
190            if (oldVersions) {
191              for (String v : vList) {
192                processPackage(pid, v, i, pidList.size());          
193              }
194            } else if (vList.isEmpty()) {
195              log.info("No Packages for "+pid);
196            } else {
197              processPackage(pid, vList.get(vList.size() - 1), i, pidList.size());
198            }
199          }
200        } else {
201          processor.alreadyVisited(pid);
202        }
203        i++;
204      }    
205    }
206
207    if (step == 0 || step == 3) {
208      JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json");
209      i = 0;
210      List<JsonObject> objects = json.getJsonObjects("guides");
211      for (JsonObject o : objects) {
212        String pid = o.asString("npm-name");
213        if (pid != null && !cpidSet.contains(pid)) {
214          cpidSet.add(pid);
215          List<String> vList = listVersions(pid);
216          if (oldVersions) {
217            for (String v : vList) {
218              processPackage(pid, v, i, objects.size());          
219            }
220          } else if (vList.isEmpty()) {
221            log.info("No Packages for "+pid);
222          } else {
223            processPackage(pid, vList.get(vList.size() - 1), i, objects.size());
224          }
225        }
226        i++;
227      }
228    }
229  }
230
231  private void processCurrentPackage(String url, String pid, Set<String> cpidSet, int i, int t) {
232    try {
233      cpidSet.add(pid);
234      if (step == 0 || (step == 1 && i < t/2) || (step == 2 && i >= t/2)) {
235        long ms1 = System.currentTimeMillis();
236        String[] p = url.split("\\/");
237        String repo = "https://build.fhir.org/ig/"+p[0]+"/"+p[1];
238        JsonObject manifest = JsonParser.parseObjectFromUrl(repo+"/package.manifest.json");
239        File co = ManagedFileAccess.file(Utilities.path(cache, pid+"."+manifest.asString("date")+".tgz"));
240        if (!co.exists()) {
241
242          HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), repo+"/package.tgz?nocache=" + System.currentTimeMillis());
243          res.checkThrowException();
244          FileUtilities.bytesToFile(res.getContent(), co);
245        }
246        NpmPackage npm = NpmPackage.fromPackage(ManagedFileAccess.inStream(co));          
247        String fv = npm.fhirVersion();
248        long ms2 = System.currentTimeMillis();
249
250        if (corePackages || !corePackage(npm)) {
251          if (fv != null && (versions.isEmpty() || versions.contains(fv))) {
252            PackageContext ctxt = new PackageContext(pid+"#current", npm, fv);
253            boolean ok = false;
254            Object context = null;
255            try {
256              context = processor.startPackage(ctxt);
257              ok = true;
258            } catch (Exception e) {
259              log.error("####### Error loading "+pid+"#current["+fv+"]: ####### "+e.getMessage());
260              //                e.printStackTrace();
261            }
262            if (ok) {
263              int c = 0;
264              for (String type : resourceTypes) {
265                for (String s : npm.listResources(type)) {
266                  c++;
267                  try {
268                    processor.processResource(ctxt, context, type, s, FileUtilities.streamToBytes(npm.load("package", s)));
269                  } catch (Exception e) {
270                    log.error("####### Error loading "+pid+"#current["+fv+"]/"+type+" ####### "+e.getMessage());
271                    //                e.printStackTrace();
272                  }
273                }
274              }
275              processor.finishPackage(ctxt);
276              log.info("Processed: "+pid+"#current: "+c+" resources ("+i+" of "+t+", "+(ms2-ms1)+"/"+(System.currentTimeMillis()-ms2)+"ms)");
277            }
278          } else {
279            log.info("Ignored: "+pid+"#current: no version");
280          }
281        }
282      }
283    } catch (Exception e) {      
284      log.error("Unable to process: "+pid+"#current: "+e.getMessage());
285    }
286  }
287
288  private Map<String, String> getAllCIPackages() throws IOException {
289    log.info("Fetch https://build.fhir.org/ig/qas.json");
290    Map<String, String> res = new HashMap<>();
291    if (current) {
292      JsonArray json = (JsonArray) JsonParser.parseFromUrl("https://build.fhir.org/ig/qas.json");
293      for (JsonObject o  : json.asJsonObjects()) {
294        String url = o.asString("repo");
295        String pid = o.asString("package-id");
296        if (url.contains("/branches/master") || url.contains("/branches/main") ) {
297          if (!res.containsKey(pid)) {
298            res.put(pid, url);
299          } else if (!url.equals(res.get(pid))) {
300            log.warn("Ignore "+url+" already encountered "+pid +" @ "+res.get(pid));
301          }
302        }
303      }
304    }
305    return res;
306  }
307
308  private List<String> listVersions(String pid) throws IOException {
309    List<String> list = new ArrayList<>();
310    if (pid !=null) {
311      for (PackageInfo i : pc.getVersions(pid)) {
312        list.add(i.getVersion());
313      }    
314    }
315    return list;
316  }
317
318  private Set<String> getAllPackages() throws IOException, ParserConfigurationException, SAXException {
319    Set<String> list = new HashSet<>();
320    for (PackageInfo i : pc.search(null, null, null, false)) {
321      list.add(i.getId());
322    }    
323    JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json");
324    for (JsonObject ig : json.getJsonObjects("guides")) {
325      list.add(ig.asString("npm-name"));
326    }
327    json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/package-feeds.json");
328    for (JsonObject feed : json.getJsonObjects("feeds")) {
329      processFeed(list, feed.asString("url"));
330    }
331
332    return list;
333  }
334
335  private void processFeed(Set<String> list, String str) throws IOException, ParserConfigurationException, SAXException {
336    log.info("Feed "+str);
337    try {
338
339      HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), str+"?nocache=" + System.currentTimeMillis());
340      res.checkThrowException();
341      Document xml = XMLUtil.parseToDom(res.getContent());
342      for (Element channel : XMLUtil.getNamedChildren(xml.getDocumentElement(), "channel")) {
343        for (Element item : XMLUtil.getNamedChildren(channel, "item")) {
344          String pid = XMLUtil.getNamedChildText(item, "title");
345          if (pid != null && pid.contains("#")) {
346            list.add(pid.substring(0, pid.indexOf("#")));
347          }
348        }
349      }
350    } catch (Exception e) {
351      log.error("   "+e.getMessage());
352    }
353  }
354
355
356  private void processPackage(String pid, String v, int i, int t) throws IOException, FHIRException, EOperationOutcome {
357    NpmPackage npm = null;
358    String fv = null;
359    try {
360      npm = pcm.loadPackage(pid, v);
361    } catch (Throwable e) {
362      log.error("Unable to load package: "+pid+"#"+v+": "+e.getMessage());
363      return;
364    }
365
366    try {
367      fv = npm.fhirVersion();
368    } catch (Throwable e) {
369      log.error("Unable to identify package FHIR version:: "+pid+"#"+v+": "+e.getMessage());
370    }
371    if (corePackages || !corePackage(npm)) {
372      PackageContext ctxt = new PackageContext(pid+"#"+v, npm, fv);
373      boolean ok = false;
374      Object context = null;
375      try {
376        context = processor.startPackage(ctxt);
377        ok = true;
378      } catch (Exception e) {
379        log.error("####### Error loading package  "+pid+"#"+v +"["+fv+"]: "+e.getMessage(), e);
380      }
381      if (ok) {
382        int c = 0;
383        if (fv != null && (versions.isEmpty() || versions.contains(fv))) {
384          for (PackagedResourceFile p : npm.listAllResources(resourceTypes)) {
385            c++;
386            try {
387              processor.processResource(ctxt, context, p.getResourceType(), p.getFilename(), FileUtilities.streamToBytes(npm.load(p.getFolder(), p.getFilename())));
388            } catch (Exception e) {
389              log.error("####### Error loading "+pid+"#"+v +"["+fv+"]/"+p.getResourceType()+" ####### "+e.getMessage(), e);
390            }
391          }
392        }    
393        processor.finishPackage(ctxt);
394        log.info("Processed: "+pid+"#"+v+": "+c+" resources ("+i+" of "+t+")");
395      }
396    }
397  }
398
399  private boolean corePackage(NpmPackage npm) {
400    return npm != null && !Utilities.noString(npm.name()) && (
401        npm.name().startsWith("hl7.terminology") || 
402        npm.name().startsWith("hl7.fhir.core") || 
403        npm.name().startsWith("hl7.fhir.r2.") || 
404        npm.name().startsWith("hl7.fhir.r2b.") || 
405        npm.name().startsWith("hl7.fhir.r3.") || 
406        npm.name().startsWith("hl7.fhir.r4.") || 
407        npm.name().startsWith("hl7.fhir.r4b.") || 
408        npm.name().startsWith("hl7.fhir.r5."));
409  }
410
411  public int getStep() {
412    return step;
413  }
414
415  public void setStep(int step) {
416    this.step = step;
417  }
418
419}