001package org.hl7.fhir.convertors.analytics;
002
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.util.ArrayList;
007import java.util.HashMap;
008import java.util.HashSet;
009import java.util.List;
010import java.util.Map;
011import java.util.Set;
012
013import javax.xml.parsers.ParserConfigurationException;
014
015import org.hl7.fhir.exceptions.FHIRException;
016import org.hl7.fhir.r5.utils.EOperationOutcome;
017import org.hl7.fhir.utilities.TextFile;
018import org.hl7.fhir.utilities.Utilities;
019import org.hl7.fhir.utilities.filesystem.ManagedFileAccess;
020import org.hl7.fhir.utilities.http.HTTPResult;
021import org.hl7.fhir.utilities.http.ManagedWebAccess;
022import org.hl7.fhir.utilities.json.model.JsonArray;
023import org.hl7.fhir.utilities.json.model.JsonObject;
024import org.hl7.fhir.utilities.json.parser.JsonParser;
025import org.hl7.fhir.utilities.npm.FilesystemPackageCacheManager;
026import org.hl7.fhir.utilities.npm.NpmPackage;
027import org.hl7.fhir.utilities.npm.PackageClient;
028import org.hl7.fhir.utilities.npm.PackageInfo;
029import org.hl7.fhir.utilities.npm.PackageServer;
030import org.hl7.fhir.utilities.xml.XMLUtil;
031import org.w3c.dom.Document;
032import org.w3c.dom.Element;
033import org.xml.sax.SAXException;
034
035public class PackageVisitor {
036
037  private PackageServer clientPackageServer = null;
038
039  public void setClientPackageServer(PackageServer packageServer) {
040    this.clientPackageServer = packageServer;
041  }
042  private List<PackageServer> cachePackageServers = null;
043  public void setCachePackageServers(List<PackageServer> packageServers) {
044    this.cachePackageServers = packageServers;
045  }
046
047  public static class PackageContext {
048    private String pid;
049    private NpmPackage npm;
050    private String version;
051    protected PackageContext(String pid, NpmPackage npm, String version) {
052      super();
053      this.pid = pid;
054      this.npm = npm;
055      this.version = version;
056    }
057    public String getPid() {
058      return pid;
059    }
060    public NpmPackage getNpm() {
061      return npm;
062    }
063    public String getVersion() {
064      return version;
065    }
066  }
067  
068  public interface IPackageVisitorProcessor {
069    public Object startPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome;
070    public void processResource(PackageContext context, Object clientContext, String type, String id, byte[] content) throws FHIRException, IOException, EOperationOutcome;
071    public void finishPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome;
072
073    public void alreadyVisited(String pid) throws FHIRException, IOException, EOperationOutcome;
074  }
075
076  private List<String> resourceTypes = new ArrayList<>();
077  private List<String> versions = new ArrayList<>();
078  private boolean corePackages;
079  private boolean oldVersions;
080  private boolean current;
081  private IPackageVisitorProcessor processor;
082  private FilesystemPackageCacheManager pcm;
083  private PackageClient pc;
084  private String cache;  
085  private int step;
086
087  public List<String> getResourceTypes() {
088    return resourceTypes;
089  }
090
091  public void setResourceTypes(List<String> resourceTypes) {
092    this.resourceTypes = resourceTypes;
093  }
094
095  public void setResourceTypes(String... resourceTypes) {
096    this.resourceTypes = new ArrayList<String>();
097    for (String s : resourceTypes) {
098      this.resourceTypes.add(s);
099    }
100  }
101
102  public List<String> getVersions() {
103    return versions;
104  }
105
106  public void setVersions(List<String> versions) {
107    this.versions = versions;
108  }
109
110
111  public boolean isCurrent() {
112    return current;
113  }
114
115  public void setCurrent(boolean current) {
116    this.current = current;
117  }
118
119  public boolean isCorePackages() {
120    return corePackages;
121  }
122
123
124
125
126  public String getCache() {
127    return cache;
128  }
129
130  public void setCache(String cache) {
131    this.cache = cache;
132  }
133
134  public void setCorePackages(boolean corePackages) {
135    this.corePackages = corePackages;
136  }
137
138
139
140
141  public boolean isOldVersions() {
142    return oldVersions;
143  }
144
145
146
147
148  public void setOldVersions(boolean oldVersions) {
149    this.oldVersions = oldVersions;
150  }
151
152
153
154
155  public IPackageVisitorProcessor getProcessor() {
156    return processor;
157  }
158
159  public void setProcessor(IPackageVisitorProcessor processor) {
160    this.processor = processor;
161  }
162
163  public void visitPackages() throws IOException, ParserConfigurationException, SAXException, FHIRException, EOperationOutcome {
164    System.out.println("Finding packages");
165    pc = clientPackageServer == null
166      ? new PackageClient(PackageServer.primaryServer())
167      : new PackageClient(clientPackageServer);
168
169    pcm = cachePackageServers == null
170      ? new FilesystemPackageCacheManager.Builder().build()
171      : new FilesystemPackageCacheManager.Builder().withPackageServers(cachePackageServers).build();
172
173    Set<String> pidList = getAllPackages();
174
175    Map<String, String> cpidMap = getAllCIPackages();
176    Set<String> cpidSet = new HashSet<>();
177    System.out.println("Go: "+cpidMap.size()+" current packages");
178    int i = 0;
179    for (String s : cpidMap.keySet()) {
180      processCurrentPackage(cpidMap.get(s), s, cpidSet, i, cpidMap.size()); 
181      i++;
182    }
183
184    System.out.println("Go: "+pidList.size()+" published packages");
185    i = 0;
186    for (String pid : pidList) {  
187      if (pid != null) {
188        if (!cpidSet.contains(pid)) {
189          cpidSet.add(pid);
190          if (step == 0 || step == 3) {
191            List<String> vList = listVersions(pid);
192            if (oldVersions) {
193              for (String v : vList) {
194                processPackage(pid, v, i, pidList.size());          
195              }
196            } else if (vList.isEmpty()) {
197              System.out.println("No Packages for "+pid);
198            } else {
199              processPackage(pid, vList.get(vList.size() - 1), i, pidList.size());
200            }
201          }
202        } else {
203          processor.alreadyVisited(pid);
204        }
205        i++;
206      }    
207    }
208
209    if (step == 0 || step == 3) {
210      JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json");
211      i = 0;
212      List<JsonObject> objects = json.getJsonObjects("guides");
213      for (JsonObject o : objects) {
214        String pid = o.asString("npm-name");
215        if (pid != null && !cpidSet.contains(pid)) {
216          cpidSet.add(pid);
217          List<String> vList = listVersions(pid);
218          if (oldVersions) {
219            for (String v : vList) {
220              processPackage(pid, v, i, objects.size());          
221            }
222          } else if (vList.isEmpty()) {
223            System.out.println("No Packages for "+pid);
224          } else {
225            processPackage(pid, vList.get(vList.size() - 1), i, objects.size());
226          }
227        }
228        i++;
229      }
230    }
231  }
232
233  private void processCurrentPackage(String url, String pid, Set<String> cpidSet, int i, int t) {
234    try {
235      cpidSet.add(pid);
236      if (step == 0 || (step == 1 && i < t/2) || (step == 2 && i >= t/2)) {
237        long ms1 = System.currentTimeMillis();
238        String[] p = url.split("\\/");
239        String repo = "https://build.fhir.org/ig/"+p[0]+"/"+p[1];
240        JsonObject manifest = JsonParser.parseObjectFromUrl(repo+"/package.manifest.json");
241        File co = ManagedFileAccess.file(Utilities.path(cache, pid+"."+manifest.asString("date")+".tgz"));
242        if (!co.exists()) {
243
244          HTTPResult res = ManagedWebAccess.get(repo+"/package.tgz?nocache=" + System.currentTimeMillis());
245          res.checkThrowException();
246          TextFile.bytesToFile(res.getContent(), co);
247        }
248        NpmPackage npm = NpmPackage.fromPackage(ManagedFileAccess.inStream(co));          
249        String fv = npm.fhirVersion();
250        long ms2 = System.currentTimeMillis();
251
252        if (corePackages || !corePackage(npm)) {
253          if (fv != null && (versions.isEmpty() || versions.contains(fv))) {
254            PackageContext ctxt = new PackageContext(pid+"#current", npm, fv);
255            boolean ok = false;
256            Object context = null;
257            try {
258              context = processor.startPackage(ctxt);
259              ok = true;
260            } catch (Exception e) {
261              System.out.println("####### Error loading "+pid+"#current["+fv+"]: ####### "+e.getMessage());
262              //                e.printStackTrace();
263            }
264            if (ok) {
265              int c = 0;
266              for (String type : resourceTypes) {
267                for (String s : npm.listResources(type)) {
268                  c++;
269                  try {
270                    processor.processResource(ctxt, context, type, s, TextFile.streamToBytes(npm.load("package", s)));
271                  } catch (Exception e) {
272                    System.out.println("####### Error loading "+pid+"#current["+fv+"]/"+type+" ####### "+e.getMessage());
273                    //                e.printStackTrace();
274                  }
275                }
276              }
277              processor.finishPackage(ctxt);
278              System.out.println("Processed: "+pid+"#current: "+c+" resources ("+i+" of "+t+", "+(ms2-ms1)+"/"+(System.currentTimeMillis()-ms2)+"ms)");
279            }
280          } else {
281            System.out.println("Ignored: "+pid+"#current: no version");            
282          }
283        }
284      }
285    } catch (Exception e) {      
286      System.out.println("Unable to process: "+pid+"#current: "+e.getMessage());      
287    }
288  }
289
290  private Map<String, String> getAllCIPackages() throws IOException {
291    System.out.println("Fetch https://build.fhir.org/ig/qas.json");
292    Map<String, String> res = new HashMap<>();
293    if (current) {
294      JsonArray json = (JsonArray) JsonParser.parseFromUrl("https://build.fhir.org/ig/qas.json");
295      for (JsonObject o  : json.asJsonObjects()) {
296        String url = o.asString("repo");
297        String pid = o.asString("package-id");
298        if (url.contains("/branches/master") || url.contains("/branches/main") ) {
299          if (!res.containsKey(pid)) {
300            res.put(pid, url);
301          } else if (!url.equals(res.get(pid))) {
302            System.out.println("Ignore "+url+" already encountered "+pid +" @ "+res.get(pid));
303          }
304        }
305      }
306    }
307    return res;
308  }
309
310  private List<String> listVersions(String pid) throws IOException {
311    List<String> list = new ArrayList<>();
312    if (pid !=null) {
313      for (PackageInfo i : pc.getVersions(pid)) {
314        list.add(i.getVersion());
315      }    
316    }
317    return list;
318  }
319
320  private Set<String> getAllPackages() throws IOException, ParserConfigurationException, SAXException {
321    Set<String> list = new HashSet<>();
322    for (PackageInfo i : pc.search(null, null, null, false)) {
323      list.add(i.getId());
324    }    
325    JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json");
326    for (JsonObject ig : json.getJsonObjects("guides")) {
327      list.add(ig.asString("npm-name"));
328    }
329    json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/package-feeds.json");
330    for (JsonObject feed : json.getJsonObjects("feeds")) {
331      processFeed(list, feed.asString("url"));
332    }
333
334    return list;
335  }
336
337  private void processFeed(Set<String> list, String str) throws IOException, ParserConfigurationException, SAXException {
338    System.out.println("Feed "+str);
339    try {
340
341      HTTPResult res = ManagedWebAccess.get(str+"?nocache=" + System.currentTimeMillis());
342      res.checkThrowException();
343      Document xml = XMLUtil.parseToDom(res.getContent());
344      for (Element channel : XMLUtil.getNamedChildren(xml.getDocumentElement(), "channel")) {
345        for (Element item : XMLUtil.getNamedChildren(channel, "item")) {
346          String pid = XMLUtil.getNamedChildText(item, "title");
347          if (pid != null && pid.contains("#")) {
348            list.add(pid.substring(0, pid.indexOf("#")));
349          }
350        }
351      }
352    } catch (Exception e) {
353      System.out.println("   "+e.getMessage());
354    }
355  }
356
357
358  private void processPackage(String pid, String v, int i, int t) throws IOException, FHIRException, EOperationOutcome {
359    NpmPackage npm = null;
360    String fv = null;
361    try {
362      npm = pcm.loadPackage(pid, v);
363    } catch (Throwable e) {
364      System.out.println("Unable to load package: "+pid+"#"+v+": "+e.getMessage());
365    }
366
367    try {
368      fv = npm.fhirVersion();
369    } catch (Throwable e) {
370      System.out.println("Unable to identify package FHIR version:: "+pid+"#"+v+": "+e.getMessage());
371    }
372    if (corePackages || !corePackage(npm)) {
373      PackageContext ctxt = new PackageContext(pid+"#"+v, npm, fv);
374      boolean ok = false;
375      Object context = null;
376      try {
377        context = processor.startPackage(ctxt);
378        ok = true;
379      } catch (Exception e) {
380        System.out.println("####### Error loading package  "+pid+"#"+v +"["+fv+"]: "+e.getMessage());
381        e.printStackTrace();
382      }
383      if (ok) {
384        int c = 0;
385        if (fv != null && (versions.isEmpty() || versions.contains(fv))) {
386          for (String type : resourceTypes) {
387            for (String s : npm.listResources(type)) {
388              c++;
389              try {
390                processor.processResource(ctxt, context, type, s, TextFile.streamToBytes(npm.load("package", s)));
391              } catch (Exception e) {
392                System.out.println("####### Error loading "+pid+"#"+v +"["+fv+"]/"+type+" ####### "+e.getMessage());
393                e.printStackTrace();
394              }
395            }
396          }
397        }    
398        processor.finishPackage(ctxt);
399        System.out.println("Processed: "+pid+"#"+v+": "+c+" resources ("+i+" of "+t+")");  
400      }
401    }
402  }
403
404  private boolean corePackage(NpmPackage npm) {
405    return npm != null && !Utilities.noString(npm.name()) && (
406        npm.name().startsWith("hl7.terminology") || 
407        npm.name().startsWith("hl7.fhir.core") || 
408        npm.name().startsWith("hl7.fhir.r2.") || 
409        npm.name().startsWith("hl7.fhir.r2b.") || 
410        npm.name().startsWith("hl7.fhir.r3.") || 
411        npm.name().startsWith("hl7.fhir.r4.") || 
412        npm.name().startsWith("hl7.fhir.r4b.") || 
413        npm.name().startsWith("hl7.fhir.r5."));
414  }
415
416  public int getStep() {
417    return step;
418  }
419
420  public void setStep(int step) {
421    this.step = step;
422  }
423
424}