
001package org.hl7.fhir.convertors.analytics; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.util.*; 007 008import javax.xml.parsers.ParserConfigurationException; 009 010import lombok.extern.slf4j.Slf4j; 011import org.hl7.fhir.exceptions.FHIRException; 012import org.hl7.fhir.r5.utils.EOperationOutcome; 013import org.hl7.fhir.utilities.FileUtilities; 014import org.hl7.fhir.utilities.Utilities; 015import org.hl7.fhir.utilities.filesystem.ManagedFileAccess; 016import org.hl7.fhir.utilities.http.HTTPResult; 017import org.hl7.fhir.utilities.http.ManagedWebAccess; 018import org.hl7.fhir.utilities.json.model.JsonArray; 019import org.hl7.fhir.utilities.json.model.JsonObject; 020import org.hl7.fhir.utilities.json.parser.JsonParser; 021import org.hl7.fhir.utilities.npm.FilesystemPackageCacheManager; 022import org.hl7.fhir.utilities.npm.NpmPackage; 023import org.hl7.fhir.utilities.npm.NpmPackage.PackagedResourceFile; 024import org.hl7.fhir.utilities.npm.PackageClient; 025import org.hl7.fhir.utilities.npm.PackageInfo; 026import org.hl7.fhir.utilities.npm.PackageServer; 027import org.hl7.fhir.utilities.xml.XMLUtil; 028import org.w3c.dom.Document; 029import org.w3c.dom.Element; 030import org.xml.sax.SAXException; 031 032@Slf4j 033public class PackageVisitor { 034 035 private PackageServer clientPackageServer = null; 036 037 public void setClientPackageServer(PackageServer packageServer) { 038 this.clientPackageServer = packageServer; 039 } 040 private List<PackageServer> cachePackageServers = null; 041 public void setCachePackageServers(List<PackageServer> packageServers) { 042 this.cachePackageServers = packageServers; 043 } 044 045 public static class PackageContext { 046 private String pid; 047 private NpmPackage npm; 048 private String version; 049 protected PackageContext(String pid, NpmPackage npm, String version) { 050 super(); 051 this.pid = pid; 052 this.npm = npm; 053 this.version = version; 054 } 055 public String getPid() { 056 return pid; 057 } 058 public NpmPackage getNpm() { 059 return npm; 060 } 061 public String getVersion() { 062 return version; 063 } 064 } 065 066 public interface IPackageVisitorProcessor { 067 public Object startPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome; 068 public void processResource(PackageContext context, Object clientContext, String type, String id, byte[] content) throws FHIRException, IOException, EOperationOutcome; 069 public void finishPackage(PackageContext context) throws FHIRException, IOException, EOperationOutcome; 070 071 public void alreadyVisited(String pid) throws FHIRException, IOException, EOperationOutcome; 072 } 073 074 private Set<String> resourceTypes = new HashSet<>(); 075 private List<String> versions = new ArrayList<>(); 076 private boolean corePackages; 077 private boolean oldVersions; 078 private boolean current; 079 private IPackageVisitorProcessor processor; 080 private FilesystemPackageCacheManager pcm; 081 private PackageClient pc; 082 private String cache; 083 private int step; 084 085 public Set<String> getResourceTypes() { 086 return resourceTypes; 087 } 088 089 public void setResourceTypes(Set<String> resourceTypes) { 090 this.resourceTypes = resourceTypes; 091 } 092 093 public void setResourceTypes(String... resourceTypes) { 094 this.resourceTypes = new HashSet<String>(); 095 for (String s : resourceTypes) { 096 this.resourceTypes.add(s); 097 } 098 } 099 100 public List<String> getVersions() { 101 return versions; 102 } 103 104 public void setVersions(List<String> versions) { 105 this.versions = versions; 106 } 107 108 109 public boolean isCurrent() { 110 return current; 111 } 112 113 public void setCurrent(boolean current) { 114 this.current = current; 115 } 116 117 public boolean isCorePackages() { 118 return corePackages; 119 } 120 121 122 123 124 public String getCache() { 125 return cache; 126 } 127 128 public void setCache(String cache) { 129 this.cache = cache; 130 } 131 132 public void setCorePackages(boolean corePackages) { 133 this.corePackages = corePackages; 134 } 135 136 137 138 139 public boolean isOldVersions() { 140 return oldVersions; 141 } 142 143 144 145 146 public void setOldVersions(boolean oldVersions) { 147 this.oldVersions = oldVersions; 148 } 149 150 151 152 153 public IPackageVisitorProcessor getProcessor() { 154 return processor; 155 } 156 157 public void setProcessor(IPackageVisitorProcessor processor) { 158 this.processor = processor; 159 } 160 161 public void visitPackages() throws IOException, ParserConfigurationException, SAXException, FHIRException, EOperationOutcome { 162 log.info("Finding packages"); 163 pc = clientPackageServer == null 164 ? new PackageClient(PackageServer.primaryServer()) 165 : new PackageClient(clientPackageServer); 166 167 pcm = cachePackageServers == null 168 ? new FilesystemPackageCacheManager.Builder().build() 169 : new FilesystemPackageCacheManager.Builder().withPackageServers(cachePackageServers).build(); 170 171 Set<String> pidList = getAllPackages(); 172 173 Map<String, String> cpidMap = getAllCIPackages(); 174 Set<String> cpidSet = new HashSet<>(); 175 log.info("Go: "+cpidMap.size()+" current packages"); 176 int i = 0; 177 for (String s : cpidMap.keySet()) { 178 processCurrentPackage(cpidMap.get(s), s, cpidSet, i, cpidMap.size()); 179 i++; 180 } 181 182 log.info("Go: "+pidList.size()+" published packages"); 183 i = 0; 184 for (String pid : pidList) { 185 if (pid != null) { 186 if (!cpidSet.contains(pid)) { 187 cpidSet.add(pid); 188 if (step == 0 || step == 3) { 189 List<String> vList = listVersions(pid); 190 if (oldVersions) { 191 for (String v : vList) { 192 processPackage(pid, v, i, pidList.size()); 193 } 194 } else if (vList.isEmpty()) { 195 log.info("No Packages for "+pid); 196 } else { 197 processPackage(pid, vList.get(vList.size() - 1), i, pidList.size()); 198 } 199 } 200 } else { 201 processor.alreadyVisited(pid); 202 } 203 i++; 204 } 205 } 206 207 if (step == 0 || step == 3) { 208 JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json"); 209 i = 0; 210 List<JsonObject> objects = json.getJsonObjects("guides"); 211 for (JsonObject o : objects) { 212 String pid = o.asString("npm-name"); 213 if (pid != null && !cpidSet.contains(pid)) { 214 cpidSet.add(pid); 215 List<String> vList = listVersions(pid); 216 if (oldVersions) { 217 for (String v : vList) { 218 processPackage(pid, v, i, objects.size()); 219 } 220 } else if (vList.isEmpty()) { 221 log.info("No Packages for "+pid); 222 } else { 223 processPackage(pid, vList.get(vList.size() - 1), i, objects.size()); 224 } 225 } 226 i++; 227 } 228 } 229 } 230 231 private void processCurrentPackage(String url, String pid, Set<String> cpidSet, int i, int t) { 232 try { 233 cpidSet.add(pid); 234 if (step == 0 || (step == 1 && i < t/2) || (step == 2 && i >= t/2)) { 235 long ms1 = System.currentTimeMillis(); 236 String[] p = url.split("\\/"); 237 String repo = "https://build.fhir.org/ig/"+p[0]+"/"+p[1]; 238 JsonObject manifest = JsonParser.parseObjectFromUrl(repo+"/package.manifest.json"); 239 File co = ManagedFileAccess.file(Utilities.path(cache, pid+"."+manifest.asString("date")+".tgz")); 240 if (!co.exists()) { 241 242 HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), repo+"/package.tgz?nocache=" + System.currentTimeMillis()); 243 res.checkThrowException(); 244 FileUtilities.bytesToFile(res.getContent(), co); 245 } 246 NpmPackage npm = NpmPackage.fromPackage(ManagedFileAccess.inStream(co)); 247 String fv = npm.fhirVersion(); 248 long ms2 = System.currentTimeMillis(); 249 250 if (corePackages || !corePackage(npm)) { 251 if (fv != null && (versions.isEmpty() || versions.contains(fv))) { 252 PackageContext ctxt = new PackageContext(pid+"#current", npm, fv); 253 boolean ok = false; 254 Object context = null; 255 try { 256 context = processor.startPackage(ctxt); 257 ok = true; 258 } catch (Exception e) { 259 log.error("####### Error loading "+pid+"#current["+fv+"]: ####### "+e.getMessage()); 260 // e.printStackTrace(); 261 } 262 if (ok) { 263 int c = 0; 264 for (String type : resourceTypes) { 265 for (String s : npm.listResources(type)) { 266 c++; 267 try { 268 processor.processResource(ctxt, context, type, s, FileUtilities.streamToBytes(npm.load("package", s))); 269 } catch (Exception e) { 270 log.error("####### Error loading "+pid+"#current["+fv+"]/"+type+" ####### "+e.getMessage()); 271 // e.printStackTrace(); 272 } 273 } 274 } 275 processor.finishPackage(ctxt); 276 log.info("Processed: "+pid+"#current: "+c+" resources ("+i+" of "+t+", "+(ms2-ms1)+"/"+(System.currentTimeMillis()-ms2)+"ms)"); 277 } 278 } else { 279 log.info("Ignored: "+pid+"#current: no version"); 280 } 281 } 282 } 283 } catch (Exception e) { 284 log.error("Unable to process: "+pid+"#current: "+e.getMessage()); 285 } 286 } 287 288 private Map<String, String> getAllCIPackages() throws IOException { 289 log.info("Fetch https://build.fhir.org/ig/qas.json"); 290 Map<String, String> res = new HashMap<>(); 291 if (current) { 292 JsonArray json = (JsonArray) JsonParser.parseFromUrl("https://build.fhir.org/ig/qas.json"); 293 for (JsonObject o : json.asJsonObjects()) { 294 String url = o.asString("repo"); 295 String pid = o.asString("package-id"); 296 if (url.contains("/branches/master") || url.contains("/branches/main") ) { 297 if (!res.containsKey(pid)) { 298 res.put(pid, url); 299 } else if (!url.equals(res.get(pid))) { 300 log.warn("Ignore "+url+" already encountered "+pid +" @ "+res.get(pid)); 301 } 302 } 303 } 304 } 305 return res; 306 } 307 308 private List<String> listVersions(String pid) throws IOException { 309 List<String> list = new ArrayList<>(); 310 if (pid !=null) { 311 for (PackageInfo i : pc.getVersions(pid)) { 312 list.add(i.getVersion()); 313 } 314 } 315 return list; 316 } 317 318 private Set<String> getAllPackages() throws IOException, ParserConfigurationException, SAXException { 319 Set<String> list = new HashSet<>(); 320 for (PackageInfo i : pc.search(null, null, null, false)) { 321 list.add(i.getId()); 322 } 323 JsonObject json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/fhir-ig-list.json"); 324 for (JsonObject ig : json.getJsonObjects("guides")) { 325 list.add(ig.asString("npm-name")); 326 } 327 json = JsonParser.parseObjectFromUrl("https://fhir.github.io/ig-registry/package-feeds.json"); 328 for (JsonObject feed : json.getJsonObjects("feeds")) { 329 processFeed(list, feed.asString("url")); 330 } 331 332 return list; 333 } 334 335 private void processFeed(Set<String> list, String str) throws IOException, ParserConfigurationException, SAXException { 336 log.info("Feed "+str); 337 try { 338 339 HTTPResult res = ManagedWebAccess.get(Arrays.asList("web"), str+"?nocache=" + System.currentTimeMillis()); 340 res.checkThrowException(); 341 Document xml = XMLUtil.parseToDom(res.getContent()); 342 for (Element channel : XMLUtil.getNamedChildren(xml.getDocumentElement(), "channel")) { 343 for (Element item : XMLUtil.getNamedChildren(channel, "item")) { 344 String pid = XMLUtil.getNamedChildText(item, "title"); 345 if (pid != null && pid.contains("#")) { 346 list.add(pid.substring(0, pid.indexOf("#"))); 347 } 348 } 349 } 350 } catch (Exception e) { 351 log.error(" "+e.getMessage()); 352 } 353 } 354 355 356 private void processPackage(String pid, String v, int i, int t) throws IOException, FHIRException, EOperationOutcome { 357 NpmPackage npm = null; 358 String fv = null; 359 try { 360 npm = pcm.loadPackage(pid, v); 361 } catch (Throwable e) { 362 log.error("Unable to load package: "+pid+"#"+v+": "+e.getMessage()); 363 return; 364 } 365 366 try { 367 fv = npm.fhirVersion(); 368 } catch (Throwable e) { 369 log.error("Unable to identify package FHIR version:: "+pid+"#"+v+": "+e.getMessage()); 370 } 371 if (corePackages || !corePackage(npm)) { 372 PackageContext ctxt = new PackageContext(pid+"#"+v, npm, fv); 373 boolean ok = false; 374 Object context = null; 375 try { 376 context = processor.startPackage(ctxt); 377 ok = true; 378 } catch (Exception e) { 379 log.error("####### Error loading package "+pid+"#"+v +"["+fv+"]: "+e.getMessage(), e); 380 } 381 if (ok) { 382 int c = 0; 383 if (fv != null && (versions.isEmpty() || versions.contains(fv))) { 384 for (PackagedResourceFile p : npm.listAllResources(resourceTypes)) { 385 c++; 386 try { 387 processor.processResource(ctxt, context, p.getResourceType(), p.getFilename(), FileUtilities.streamToBytes(npm.load(p.getFolder(), p.getFilename()))); 388 } catch (Exception e) { 389 log.error("####### Error loading "+pid+"#"+v +"["+fv+"]/"+p.getResourceType()+" ####### "+e.getMessage(), e); 390 } 391 } 392 } 393 processor.finishPackage(ctxt); 394 log.info("Processed: "+pid+"#"+v+": "+c+" resources ("+i+" of "+t+")"); 395 } 396 } 397 } 398 399 private boolean corePackage(NpmPackage npm) { 400 return npm != null && !Utilities.noString(npm.name()) && ( 401 npm.name().startsWith("hl7.terminology") || 402 npm.name().startsWith("hl7.fhir.core") || 403 npm.name().startsWith("hl7.fhir.r2.") || 404 npm.name().startsWith("hl7.fhir.r2b.") || 405 npm.name().startsWith("hl7.fhir.r3.") || 406 npm.name().startsWith("hl7.fhir.r4.") || 407 npm.name().startsWith("hl7.fhir.r4b.") || 408 npm.name().startsWith("hl7.fhir.r5.")); 409 } 410 411 public int getStep() { 412 return step; 413 } 414 415 public void setStep(int step) { 416 this.step = step; 417 } 418 419}