
001package org.hl7.fhir.convertors.analytics; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.util.*; 007 008import javax.xml.parsers.ParserConfigurationException; 009 010import lombok.extern.slf4j.Slf4j; 011import org.hl7.fhir.exceptions.FHIRException; 012import org.hl7.fhir.r5.utils.EOperationOutcome; 013import org.hl7.fhir.utilities.FileUtilities; 014import org.hl7.fhir.utilities.Utilities; 015import org.hl7.fhir.utilities.filesystem.ManagedFileAccess; 016import org.hl7.fhir.utilities.http.HTTPResult; 017import org.hl7.fhir.utilities.http.ManagedWebAccess; 018import org.hl7.fhir.utilities.json.model.JsonArray; 019import org.hl7.fhir.utilities.json.model.JsonObject; 020import org.hl7.fhir.utilities.json.parser.JsonParser; 021import org.hl7.fhir.utilities.npm.FilesystemPackageCacheManager; 022import org.hl7.fhir.utilities.npm.NpmPackage; 023import org.hl7.fhir.utilities.npm.NpmPackage.PackagedResourceFile; 024import org.hl7.fhir.utilities.npm.PackageClient; 025import org.hl7.fhir.utilities.npm.PackageInfo; 026import org.hl7.fhir.utilities.npm.PackageServer; 027import org.hl7.fhir.utilities.xml.XMLUtil; 028import org.w3c.dom.Document; 029import org.w3c.dom.Element; 030import org.xml.sax.SAXException; 031 032@Slf4j 033public class PackageVisitor { 034 035 private PackageServer clientPackageServer = null; 036 037 public void setClientPackageServer(PackageServer packageServer) { 038 this.clientPackageServer = packageServer; 039 } 040 private List<PackageServer> cachePackageServers = null; 041 public void setCachePackageServers(List<PackageServer> packageServers) { 042 this.cachePackageServers = packageServers; 043 } 044 045 public static class PackageContext { 046 private String pid; 047 private NpmPackage npm; 048 private String version; 049 protected PackageContext(String pid, NpmPackage npm, String version) { 050 super(); 051 this.pid = pid; 052 this.npm = npm; 053 this.version = version; 054 } 055 public String getPid() { 056 return pid; 057 } 058 public NpmPackage getNpm() { 059 return npm; 060 } 061 public String getVersion() { 062 return version; 063 } 064 } 065 066 public interface IPackageVisitorProcessor { 067 public Object startPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome; 068 public void processResource(PackageContext context, Object clientContext, String type, String id, byte[] content) throws FHIRException, IOException, EOperationOutcome; 069 public void finishPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome; 070 071 public void alreadyVisited(String pid) throws FHIRException, IOException, EOperationOutcome; 072 } 073 074 private Set<String> resourceTypes = new HashSet<>(); 075 private List<String> versions = new ArrayList<>(); 076 private boolean corePackages; 077 private boolean oldVersions; 078 private boolean current; 079 private IPackageVisitorProcessor processor; 080 private FilesystemPackageCacheManager pcm; 081 private PackageClient pc; 082 private String cache; 083 private int step; 084 085 public Set<String> getResourceTypes() { 086 return resourceTypes; 087 } 088 089 public void setResourceTypes(Set<String> resourceTypes) { 090 this.resourceTypes = resourceTypes; 091 } 092 093 public void setResourceTypes(String... resourceTypes) { 094 this.resourceTypes = new HashSet<String>(); 095 for (String s : resourceTypes) { 096 this.resourceTypes.add(s); 097 } 098 } 099 100 public List<String> getVersions() { 101 return versions; 102 } 103 104 public void setVersions(List<String> versions) { 105 this.versions = versions; 106 } 107 108 109 public boolean isCurrent() { 110 return current; 111 } 112 113 public void setCurrent(boolean current) { 114 this.current = current; 115 } 116 117 public boolean isCorePackages() { 118 return corePackages; 119 } 120 121 122 123 124 public String getCache() { 125 return cache; 126 } 127 128 public void setCache(String cache) { 129 this.cache = cache; 130 } 131 132 public void setCorePackages(boolean corePackages) { 133 this.corePackages = corePackages; 134 } 135 136 137 138 139 public boolean isOldVersions() { 140 return oldVersions; 141 } 142 143 144 145 146 public void setOldVersions(boolean oldVersions) { 147 this.oldVersions = oldVersions; 148 } 149 150 151 152 153 public IPackageVisitorProcessor getProcessor() { 154 return processor; 155 } 156 157 public void setProcessor(IPackageVisitorProcessor processor) { 158 this.processor = processor; 159 } 160 161 public void visitPackages() throws IOException, ParserConfigurationException, SAXException, FHIRException, EOperationOutcome { 162 log.info("Finding packages"); 163 pc = clientPackageServer == null 164 ? new PackageClient(PackageServer.primaryServer()) 165 : new PackageClient(clientPackageServer); 166 167 pcm = cachePackageServers == null 168 ? new FilesystemPackageCacheManager.Builder().build() 169 : new FilesystemPackageCacheManager.Builder().withPackageServers(cachePackageServers).build(); 170 171 Set<String> pidList = getAllPackages(); 172 173 Map<String, String> cpidMap = getAllCIPackages(); 174 Set<String> cpidSet = new HashSet<>(); 175 log.info("Go: "+cpidMap.size()+" current packages"); 176 int i = 0; 177 for (String s : cpidMap.keySet()) { 178 processCurrentPackage(cpidMap.get(s), s, cpidSet, i, cpidMap.size()); 179 i++; 180 } 181 182 log.info("Go: "+pidList.size()+" published packages"); 183 i = 0; 184 for (String pid : pidList) { 185 if (pid != null) { 186 if (!cpidSet.contains(pid)) { 187 cpidSet.add(pid); 188 if (step == 0 || step == 3) { 189 List<String> vList = listVersions(pid); 190 if (oldVersions) { 191 for (String v : vList) { 192 processPackage(pid, v, i, pidList.size()); 193 } 194 } else if (vList.isEmpty()) { 195 log.info("No Packages for "+pid); 196 } else { 197 processPackage(pid, vList.get(vList.size() - 1), i, pidList.size()); 198 } 199 } 200 } else { 201 processor.alreadyVisited(pid); 202 } 203 i++; 204 } 205 } 206 207 if (step == 0 || step == 3) { 208 JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json"); 209 i = 0; 210 List<JsonObject> objects = json.getJsonObjects("guides"); 211 for (JsonObject o : objects) { 212 String pid = o.asString("npm-name"); 213 if (pid != null && !cpidSet.contains(pid)) { 214 cpidSet.add(pid); 215 List<String> vList = listVersions(pid); 216 if (oldVersions) { 217 for (String v : vList) { 218 processPackage(pid, v, i, objects.size()); 219 } 220 } else if (vList.isEmpty()) { 221 log.info("No Packages for "+pid); 222 } else { 223 processPackage(pid, vList.get(vList.size() - 1), i, objects.size()); 224 } 225 } 226 i++; 227 } 228 } 229 } 230 231 private void processCurrentPackage(String url, String pid, Set<String> cpidSet, int i, int t) { 232 try { 233 cpidSet.add(pid); 234 if (step == 0 || (step == 1 && i < t/2) || (step == 2 && i >= t/2)) { 235 long ms1 = System.currentTimeMillis(); 236 String[] p = url.split("\\/"); 237 String repo = "https://build.fhir.org/ig/"+p[0]+"/"+p[1]; 238 JsonObject manifest = JsonParser.parseObjectFromUrl(repo+"/package.manifest.json"); 239 File co = ManagedFileAccess.file(Utilities.path(cache, pid+"."+manifest.asString("date")+".tgz")); 240 if (!co.exists()) { 241 242 HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), repo+"/package.tgz?nocache=" + System.currentTimeMillis()); 243 res.checkThrowException(); 244 FileUtilities.bytesToFile(res.getContent(), co); 245 } 246 NpmPackage npm = NpmPackage.fromPackage(ManagedFileAccess.inStream(co)); 247 String fv = npm.fhirVersion(); 248 long ms2 = System.currentTimeMillis(); 249 250 if (corePackages || !corePackage(npm)) { 251 if (fv != null && (versions.isEmpty() || versions.contains(fv))) { 252 PackageContext ctxt = new PackageContext(pid+"#current", npm, fv); 253 boolean ok = false; 254 Object context = null; 255 try { 256 context = processor.startPackage(ctxt); 257 ok = true; 258 } catch (Exception e) { 259 log.error("####### Error loading "+pid+"#current["+fv+"]: ####### "+e.getMessage()); 260 // e.printStackTrace(); 261 } 262 if (ok) { 263 int c = 0; 264 for (PackagedResourceFile pri : npm.listAllResources(resourceTypes)) { 265 c++; 266 try { 267 processor.processResource(ctxt, context, pri.getResourceType(), pri.getFilename(), FileUtilities.streamToBytes(npm.load(pri.getFolder(), pri.getFilename()))); 268 } catch (Exception e) { 269 log.error("####### Error loading "+pid+"#current["+fv+"]/"+pri.getResourceType()+" ####### "+e.getMessage()); 270 // e.printStackTrace(); 271 } 272 } 273 processor.finishPackage(ctxt); 274 log.info("Processed: "+pid+"#current: "+c+" resources ("+i+" of "+t+", "+(ms2-ms1)+"/"+(System.currentTimeMillis()-ms2)+"ms)"); 275 } 276 } else { 277 log.info("Ignored: "+pid+"#current: no version"); 278 } 279 } 280 } 281 } catch (Exception e) { 282 log.error("Unable to process: "+pid+"#current: "+e.getMessage()); 283 } 284 } 285 286 private Map<String, String> getAllCIPackages() throws IOException { 287 log.info("Fetch https://build.fhir.org/ig/qas.json"); 288 Map<String, String> res = new HashMap<>(); 289 if (current) { 290 JsonArray json = (JsonArray) JsonParser.parseFromUrl("https://build.fhir.org/ig/qas.json"); 291 for (JsonObject o : json.asJsonObjects()) { 292 String url = o.asString("repo"); 293 String pid = o.asString("package-id"); 294 if (url.contains("/branches/master") || url.contains("/branches/main") ) { 295 if (!res.containsKey(pid)) { 296 res.put(pid, url); 297 } else if (!url.equals(res.get(pid))) { 298 log.warn("Ignore "+url+" already encountered "+pid +" @ "+res.get(pid)); 299 } 300 } 301 } 302 } 303 return res; 304 } 305 306 private List<String> listVersions(String pid) throws IOException { 307 List<String> list = new ArrayList<>(); 308 if (pid !=null) { 309 for (PackageInfo i : pc.getVersions(pid)) { 310 list.add(i.getVersion()); 311 } 312 } 313 return list; 314 } 315 316 private Set<String> getAllPackages() throws IOException, ParserConfigurationException, SAXException { 317 Set<String> list = new HashSet<>(); 318 for (PackageInfo i : pc.search(null, null, null, false)) { 319 list.add(i.getId()); 320 } 321 JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json"); 322 for (JsonObject ig : json.getJsonObjects("guides")) { 323 list.add(ig.asString("npm-name")); 324 } 325 json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/package-feeds.json"); 326 for (JsonObject feed : json.getJsonObjects("feeds")) { 327 processFeed(list, feed.asString("url")); 328 } 329 330 return list; 331 } 332 333 private void processFeed(Set<String> list, String str) throws IOException, ParserConfigurationException, SAXException { 334 log.info("Feed "+str); 335 try { 336 337 HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), str+"?nocache=" + System.currentTimeMillis()); 338 res.checkThrowException(); 339 Document xml = XMLUtil.parseToDom(res.getContent()); 340 for (Element channel : XMLUtil.getNamedChildren(xml.getDocumentElement(), "channel")) { 341 for (Element item : XMLUtil.getNamedChildren(channel, "item")) { 342 String pid = XMLUtil.getNamedChildText(item, "title"); 343 if (pid != null && pid.contains("#")) { 344 list.add(pid.substring(0, pid.indexOf("#"))); 345 } 346 } 347 } 348 } catch (Exception e) { 349 log.error(" "+e.getMessage()); 350 } 351 } 352 353 354 private void processPackage(String pid, String v, int i, int t) throws IOException, FHIRException, EOperationOutcome { 355 NpmPackage npm = null; 356 String fv = null; 357 try { 358 npm = pcm.loadPackage(pid, v); 359 } catch (Throwable e) { 360 log.error("Unable to load package: "+pid+"#"+v+": "+e.getMessage()); 361 return; 362 } 363 364 try { 365 fv = npm.fhirVersion(); 366 } catch (Throwable e) { 367 log.error("Unable to identify package FHIR version:: "+pid+"#"+v+": "+e.getMessage()); 368 } 369 if (corePackages || !corePackage(npm)) { 370 PackageContext ctxt = new PackageContext(pid+"#"+v, npm, fv); 371 boolean ok = false; 372 Object context = null; 373 try { 374 context = processor.startPackage(ctxt); 375 ok = true; 376 } catch (Exception e) { 377 log.error("####### Error loading package "+pid+"#"+v +"["+fv+"]: "+e.getMessage(), e); 378 } 379 if (ok) { 380 int c = 0; 381 if (fv != null && (versions.isEmpty() || versions.contains(fv))) { 382 for (PackagedResourceFile p : npm.listAllResources(resourceTypes)) { 383 c++; 384 try { 385 processor.processResource(ctxt, context, p.getResourceType(), p.getFilename(), FileUtilities.streamToBytes(npm.load(p.getFolder(), p.getFilename()))); 386 } catch (Exception e) { 387 log.error("####### Error loading "+pid+"#"+v +"["+fv+"]/"+p.getResourceType()+" ####### "+e.getMessage(), e); 388 } 389 } 390 } 391 processor.finishPackage(ctxt); 392 log.info("Processed: "+pid+"#"+v+": "+c+" resources ("+i+" of "+t+")"); 393 } 394 } 395 } 396 397 private boolean corePackage(NpmPackage npm) { 398 return npm != null && !Utilities.noString(npm.name()) && ( 399 npm.name().startsWith("hl7.terminology") || 400 npm.name().startsWith("hl7.fhir.core") || 401 npm.name().startsWith("hl7.fhir.r2.") || 402 npm.name().startsWith("hl7.fhir.r2b.") || 403 npm.name().startsWith("hl7.fhir.r3.") || 404 npm.name().startsWith("hl7.fhir.r4.") || 405 npm.name().startsWith("hl7.fhir.r4b.") || 406 npm.name().startsWith("hl7.fhir.r5.")); 407 } 408 409 public int getStep() { 410 return step; 411 } 412 413 public void setStep(int step) { 414 this.step = step; 415 } 416 417}