View Javadoc
1   package ca.uhn.fhir.jpa.term;
2   
3   import ca.uhn.fhir.context.FhirContext;
4   import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
5   import ca.uhn.fhir.jpa.entity.TermConcept;
6   import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink;
7   import ca.uhn.fhir.jpa.term.custom.ConceptHandler;
8   import ca.uhn.fhir.jpa.term.custom.HierarchyHandler;
9   import ca.uhn.fhir.jpa.term.loinc.*;
10  import ca.uhn.fhir.jpa.term.snomedct.SctHandlerConcept;
11  import ca.uhn.fhir.jpa.term.snomedct.SctHandlerDescription;
12  import ca.uhn.fhir.jpa.term.snomedct.SctHandlerRelationship;
13  import ca.uhn.fhir.jpa.util.Counter;
14  import ca.uhn.fhir.rest.api.EncodingEnum;
15  import ca.uhn.fhir.rest.api.server.RequestDetails;
16  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
17  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
18  import ca.uhn.fhir.rest.server.exceptions.UnprocessableEntityException;
19  import ca.uhn.fhir.util.ValidateUtil;
20  import com.google.common.annotations.VisibleForTesting;
21  import com.google.common.base.Charsets;
22  import org.apache.commons.csv.CSVFormat;
23  import org.apache.commons.csv.CSVParser;
24  import org.apache.commons.csv.CSVRecord;
25  import org.apache.commons.csv.QuoteMode;
26  import org.apache.commons.io.FileUtils;
27  import org.apache.commons.io.IOUtils;
28  import org.apache.commons.io.input.BOMInputStream;
29  import org.apache.commons.lang3.ObjectUtils;
30  import org.apache.commons.lang3.StringUtils;
31  import org.apache.commons.lang3.Validate;
32  import org.hl7.fhir.instance.model.api.IIdType;
33  import org.hl7.fhir.r4.model.CodeSystem;
34  import org.hl7.fhir.r4.model.ConceptMap;
35  import org.hl7.fhir.r4.model.Enumerations;
36  import org.hl7.fhir.r4.model.ValueSet;
37  import org.jetbrains.annotations.NotNull;
38  import org.springframework.beans.factory.annotation.Autowired;
39  
40  import java.io.*;
41  import java.util.*;
42  import java.util.Map.Entry;
43  import java.util.zip.ZipEntry;
44  import java.util.zip.ZipInputStream;
45  
46  import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.*;
47  import static org.apache.commons.lang3.StringUtils.isNotBlank;
48  
49  /*
50   * #%L
51   * HAPI FHIR JPA Server
52   * %%
53   * Copyright (C) 2014 - 2019 University Health Network
54   * %%
55   * Licensed under the Apache License, Version 2.0 (the "License");
56   * you may not use this file except in compliance with the License.
57   * You may obtain a copy of the License at
58   *
59   *      http://www.apache.org/licenses/LICENSE-2.0
60   *
61   * Unless required by applicable law or agreed to in writing, software
62   * distributed under the License is distributed on an "AS IS" BASIS,
63   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
64   * See the License for the specific language governing permissions and
65   * limitations under the License.
66   * #L%
67   */
68  
69  public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc {
70  	public static final String SCT_FILE_CONCEPT = "Terminology/sct2_Concept_Full_";
71  	public static final String SCT_FILE_DESCRIPTION = "Terminology/sct2_Description_Full-en";
72  	public static final String SCT_FILE_RELATIONSHIP = "Terminology/sct2_Relationship_Full";
73  
74  	public static final String IMGTHLA_HLA_NOM_TXT = "hla_nom.txt";
75  	public static final String IMGTHLA_HLA_XML = "hla.xml";
76  
77  	public static final String CUSTOM_CONCEPTS_FILE = "concepts.csv";
78  	public static final String CUSTOM_HIERARCHY_FILE = "hierarchy.csv";
79  	public static final String CUSTOM_CODESYSTEM_JSON = "codesystem.json";
80  	public static final String CUSTOM_CODESYSTEM_XML = "codesystem.xml";
81  
82  	private static final int LOG_INCREMENT = 1000;
83  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvcImpl.class);
84  
85  	@Autowired
86  	private IHapiTerminologySvc myTermSvc;
87  
88  	// FYI: Hardcoded to R4 because that's what the term svc uses internally
89  	private final FhirContext myCtx = FhirContext.forR4();
90  
91  	private void dropCircularRefs(TermConcept theConcept, ArrayList<String> theChain, Map<String, TermConcept> theCode2concept, Counter theCircularCounter) {
92  
93  		theChain.add(theConcept.getCode());
94  		for (Iterator<TermConceptParentChildLink> childIter = theConcept.getChildren().iterator(); childIter.hasNext(); ) {
95  			TermConceptParentChildLink next = childIter.next();
96  			TermConcept nextChild = next.getChild();
97  			if (theChain.contains(nextChild.getCode())) {
98  
99  				StringBuilder b = new StringBuilder();
100 				b.append("Removing circular reference code ");
101 				b.append(nextChild.getCode());
102 				b.append(" from parent ");
103 				b.append(next.getParent().getCode());
104 				b.append(". Chain was: ");
105 				for (String nextInChain : theChain) {
106 					TermConcept nextCode = theCode2concept.get(nextInChain);
107 					b.append(nextCode.getCode());
108 					b.append('[');
109 					b.append(StringUtils.substring(nextCode.getDisplay(), 0, 20).replace("[", "").replace("]", "").trim());
110 					b.append("] ");
111 				}
112 				ourLog.info(b.toString(), theConcept.getCode());
113 				childIter.remove();
114 				nextChild.getParents().remove(next);
115 
116 			} else {
117 				dropCircularRefs(nextChild, theChain, theCode2concept, theCircularCounter);
118 			}
119 		}
120 		theChain.remove(theChain.size() - 1);
121 
122 	}
123 
124 	private void iterateOverZipFile(LoadedFileDescriptors theDescriptors, String theFileNamePart, IRecordHandler theHandler, char theDelimiter, QuoteMode theQuoteMode, boolean theIsPartialFilename) {
125 
126 		boolean foundMatch = false;
127 		for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) {
128 			String nextFilename = nextZipBytes.getFilename();
129 			boolean matches;
130 			if (theIsPartialFilename) {
131 				matches = nextFilename.contains(theFileNamePart);
132 			} else {
133 				matches = nextFilename.endsWith("/" + theFileNamePart) || nextFilename.equals(theFileNamePart);
134 			}
135 
136 			if (matches) {
137 				ourLog.info("Processing file {}", nextFilename);
138 				foundMatch = true;
139 
140 				Reader reader;
141 				CSVParser parsed;
142 				try {
143 					reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
144 
145 					if (ourLog.isTraceEnabled()) {
146 						String contents = IOUtils.toString(reader);
147 						ourLog.info("File contents for: {}\n{}", nextFilename, contents);
148 						reader = new StringReader(contents);
149 					}
150 
151 					CSVFormat format = CSVFormat.newFormat(theDelimiter).withFirstRecordAsHeader();
152 					if (theQuoteMode != null) {
153 						format = format.withQuote('"').withQuoteMode(theQuoteMode);
154 					}
155 					parsed = new CSVParser(reader, format);
156 					Iterator<CSVRecord> iter = parsed.iterator();
157 					ourLog.debug("Header map: {}", parsed.getHeaderMap());
158 
159 					int count = 0;
160 					int nextLoggedCount = 0;
161 					while (iter.hasNext()) {
162 						CSVRecord nextRecord = iter.next();
163 						if (nextRecord.isConsistent() == false) {
164 							continue;
165 						}
166 						theHandler.accept(nextRecord);
167 						count++;
168 						if (count >= nextLoggedCount) {
169 							ourLog.info(" * Processed {} records in {}", count, nextFilename);
170 							nextLoggedCount += LOG_INCREMENT;
171 						}
172 					}
173 
174 				} catch (IOException e) {
175 					throw new InternalErrorException(e);
176 				}
177 			}
178 
179 		}
180 
181 		if (!foundMatch) {
182 			throw new InvalidRequestException("Did not find file matching " + theFileNamePart);
183 		}
184 
185 	}
186 
187 	@Override
188 	public UploadStatistics loadImgthla(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
189 		LoadedFileDescriptors descriptors = null;
190 		try {
191 			descriptors = new LoadedFileDescriptors(theFiles);
192 			List<String> mandatoryFilenameFragments = Arrays.asList(
193 				IMGTHLA_HLA_NOM_TXT,
194 				IMGTHLA_HLA_XML
195 			);
196 			descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments);
197 
198 			ourLog.info("Beginning IMGTHLA processing");
199 
200 			return processImgthlaFiles(descriptors, theRequestDetails);
201 		} finally {
202 			IOUtils.closeQuietly(descriptors);
203 		}
204 	}
205 
206 	@Override
207 	public UploadStatistics loadLoinc(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
208 		try (LoadedFileDescriptors descriptors = new LoadedFileDescriptors(theFiles)) {
209 			List<String> loincUploadPropertiesFragment = Arrays.asList(
210 				LOINC_UPLOAD_PROPERTIES_FILE.getCode()
211 			);
212 			descriptors.verifyMandatoryFilesExist(loincUploadPropertiesFragment);
213 
214 			Properties uploadProperties = getProperties(descriptors, LOINC_UPLOAD_PROPERTIES_FILE.getCode());
215 
216 			List<String> mandatoryFilenameFragments = Arrays.asList(
217 				uploadProperties.getProperty(LOINC_ANSWERLIST_FILE.getCode(), LOINC_ANSWERLIST_FILE_DEFAULT.getCode()),
218 				uploadProperties.getProperty(LOINC_ANSWERLIST_LINK_FILE.getCode(), LOINC_ANSWERLIST_LINK_FILE_DEFAULT.getCode()),
219 				uploadProperties.getProperty(LOINC_DOCUMENT_ONTOLOGY_FILE.getCode(), LOINC_DOCUMENT_ONTOLOGY_FILE_DEFAULT.getCode()),
220 				uploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()),
221 				uploadProperties.getProperty(LOINC_HIERARCHY_FILE.getCode(), LOINC_HIERARCHY_FILE_DEFAULT.getCode()),
222 				uploadProperties.getProperty(LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE.getCode(), LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE_DEFAULT.getCode()),
223 				uploadProperties.getProperty(LOINC_IMAGING_DOCUMENT_CODES_FILE.getCode(), LOINC_IMAGING_DOCUMENT_CODES_FILE_DEFAULT.getCode()),
224 				uploadProperties.getProperty(LOINC_PART_FILE.getCode(), LOINC_PART_FILE_DEFAULT.getCode()),
225 				uploadProperties.getProperty(LOINC_PART_LINK_FILE.getCode(), LOINC_PART_LINK_FILE_DEFAULT.getCode()),
226 				uploadProperties.getProperty(LOINC_PART_RELATED_CODE_MAPPING_FILE.getCode(), LOINC_PART_RELATED_CODE_MAPPING_FILE_DEFAULT.getCode()),
227 				uploadProperties.getProperty(LOINC_RSNA_PLAYBOOK_FILE.getCode(), LOINC_RSNA_PLAYBOOK_FILE_DEFAULT.getCode()),
228 				uploadProperties.getProperty(LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE.getCode(), LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE_DEFAULT.getCode()),
229 				uploadProperties.getProperty(LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE.getCode(), LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE_DEFAULT.getCode()),
230 				uploadProperties.getProperty(LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE.getCode(), LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE_DEFAULT.getCode())
231 			);
232 			descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments);
233 
234 			List<String> optionalFilenameFragments = Arrays.asList(
235 				uploadProperties.getProperty(LOINC_GROUP_FILE.getCode(), LOINC_GROUP_FILE_DEFAULT.getCode()),
236 				uploadProperties.getProperty(LOINC_GROUP_TERMS_FILE.getCode(), LOINC_GROUP_TERMS_FILE_DEFAULT.getCode()),
237 				uploadProperties.getProperty(LOINC_PARENT_GROUP_FILE.getCode(), LOINC_PARENT_GROUP_FILE_DEFAULT.getCode())
238 			);
239 			descriptors.verifyOptionalFilesExist(optionalFilenameFragments);
240 
241 			ourLog.info("Beginning LOINC processing");
242 
243 			return processLoincFiles(descriptors, theRequestDetails, uploadProperties);
244 		}
245 	}
246 
247 	@NotNull
248 	private Properties getProperties(LoadedFileDescriptors theDescriptors, String thePropertiesFile) {
249 		Properties retVal = new Properties();
250 		for (FileDescriptor next : theDescriptors.getUncompressedFileDescriptors()) {
251 			if (next.getFilename().endsWith(thePropertiesFile)) {
252 				try {
253 					try (InputStream inputStream = next.getInputStream()) {
254 						retVal.load(inputStream);
255 					}
256 				} catch (IOException e) {
257 					throw new InternalErrorException("Failed to read " + thePropertiesFile, e);
258 				}
259 			}
260 		}
261 		return retVal;
262 	}
263 
264 	@Override
265 	public UploadStatistics loadSnomedCt(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
266 		try (LoadedFileDescriptors descriptors = new LoadedFileDescriptors(theFiles)) {
267 
268 			List<String> expectedFilenameFragments = Arrays.asList(
269 				SCT_FILE_DESCRIPTION,
270 				SCT_FILE_RELATIONSHIP,
271 				SCT_FILE_CONCEPT);
272 			descriptors.verifyMandatoryFilesExist(expectedFilenameFragments);
273 
274 			ourLog.info("Beginning SNOMED CT processing");
275 
276 			return processSnomedCtFiles(descriptors, theRequestDetails);
277 		}
278 	}
279 
280 	@Override
281 	public UploadStatistics loadCustom(String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
282 		try (LoadedFileDescriptors descriptors = new LoadedFileDescriptors(theFiles)) {
283 			final Map<String, TermConcept> code2concept = new HashMap<>();
284 			IRecordHandler handler;
285 
286 			Optional<String> codeSystemContent = loadFile(descriptors, CUSTOM_CODESYSTEM_JSON, CUSTOM_CODESYSTEM_XML);
287 			CodeSystem codeSystem;
288 			if (codeSystemContent.isPresent()) {
289 				codeSystem = EncodingEnum
290 					.detectEncoding(codeSystemContent.get())
291 					.newParser(myCtx)
292 					.parseResource(CodeSystem.class, codeSystemContent.get());
293 				ValidateUtil.isTrueOrThrowInvalidRequest(theSystem.equalsIgnoreCase(codeSystem.getUrl()), "CodeSystem.url does not match the supplied system: %s", theSystem);
294 				ValidateUtil.isTrueOrThrowInvalidRequest(CodeSystem.CodeSystemContentMode.NOTPRESENT.equals(codeSystem.getContent()), "CodeSystem.content does not match the expected value: %s", CodeSystem.CodeSystemContentMode.NOTPRESENT.toCode());
295 			} else {
296 				codeSystem = new CodeSystem();
297 				codeSystem.setUrl(theSystem);
298 				codeSystem.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
299 			}
300 
301 			TermCodeSystemVersion csv = new TermCodeSystemVersion();
302 
303 			// Concept File
304 			handler = new ConceptHandler(code2concept, csv);
305 			iterateOverZipFile(descriptors, CUSTOM_CONCEPTS_FILE, handler, ',', QuoteMode.NON_NUMERIC, false);
306 
307 			// Hierarchy
308 			if (descriptors.hasFile(CUSTOM_HIERARCHY_FILE)) {
309 				handler = new HierarchyHandler(code2concept);
310 				iterateOverZipFile(descriptors, CUSTOM_HIERARCHY_FILE, handler, ',', QuoteMode.NON_NUMERIC, false);
311 			}
312 
313 			// Add root concepts to CodeSystemVersion
314 			for (TermConcept nextConcept : code2concept.values()) {
315 				if (nextConcept.getParents().isEmpty()) {
316 					csv.getConcepts().add(nextConcept);
317 				}
318 			}
319 
320 			IIdType target = storeCodeSystem(theRequestDetails, csv, codeSystem, null, null);
321 			return new UploadStatistics(code2concept.size(), target);
322 		}
323 	}
324 
325 	private Optional<String> loadFile(LoadedFileDescriptors theDescriptors, String... theFilenames) {
326 		for (FileDescriptor next : theDescriptors.getUncompressedFileDescriptors()) {
327 			for (String nextFilename : theFilenames) {
328 				if (next.getFilename().endsWith(nextFilename)) {
329 					try {
330 						String contents = IOUtils.toString(next.getInputStream(), Charsets.UTF_8);
331 						return Optional.of(contents);
332 					} catch (IOException e) {
333 						throw new InternalErrorException(e);
334 					}
335 				}
336 			}
337 		}
338 		return Optional.empty();
339 	}
340 
341 	UploadStatistics processImgthlaFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
342 		final TermCodeSystemVersionermCodeSystemVersion">TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
343 		final Map<String, TermConcept> code2concept = new HashMap<>();
344 		final List<ValueSet> valueSets = new ArrayList<>();
345 		final List<ConceptMap> conceptMaps = new ArrayList<>();
346 
347 		CodeSystem imgthlaCs;
348 		try {
349 			String imgthlaCsString = IOUtils.toString(BaseHapiTerminologySvcImpl.class.getResourceAsStream("/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml"), Charsets.UTF_8);
350 			imgthlaCs = FhirContext.forR4().newXmlParser().parseResource(CodeSystem.class, imgthlaCsString);
351 		} catch (IOException e) {
352 			throw new InternalErrorException("Failed to load imgthla.xml", e);
353 		}
354 
355 		Map<String, CodeSystem.PropertyType> propertyNamesToTypes = new HashMap<>();
356 		for (CodeSystem.PropertyComponent nextProperty : imgthlaCs.getProperty()) {
357 			String nextPropertyCode = nextProperty.getCode();
358 			CodeSystem.PropertyType nextPropertyType = nextProperty.getType();
359 			if (isNotBlank(nextPropertyCode)) {
360 				propertyNamesToTypes.put(nextPropertyCode, nextPropertyType);
361 			}
362 		}
363 
364 		boolean foundHlaNom = false;
365 		boolean foundHlaXml = false;
366 		for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) {
367 			String nextFilename = nextZipBytes.getFilename();
368 
369 			if (!IMGTHLA_HLA_NOM_TXT.equals(nextFilename) && !nextFilename.endsWith("/" + IMGTHLA_HLA_NOM_TXT)
370 				&& !IMGTHLA_HLA_XML.equals(nextFilename) && !nextFilename.endsWith("/" + IMGTHLA_HLA_XML)) {
371 				ourLog.info("Skipping unexpected file {}", nextFilename);
372 				continue;
373 			}
374 
375 			if (IMGTHLA_HLA_NOM_TXT.equals(nextFilename) || nextFilename.endsWith("/" + IMGTHLA_HLA_NOM_TXT)) {
376 				// process colon-delimited hla_nom.txt file
377 				ourLog.info("Processing file {}", nextFilename);
378 
379 //				IRecordHandler handler = new HlaNomTxtHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
380 //				AntigenSource antigenSource = new WmdaAntigenSource(hlaNomFilename, relSerSerFilename, relDnaSerFilename);
381 
382 				Reader reader = null;
383 				try {
384 					reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
385 
386 					if (ourLog.isTraceEnabled()) {
387 						String contents = IOUtils.toString(reader);
388 						ourLog.info("File contents for: {}\n{}", nextFilename, contents);
389 						reader = new StringReader(contents);
390 					}
391 
392 					LineNumberReader lnr = new LineNumberReader(reader);
393 					while (lnr.readLine() != null) {
394 					}
395 					ourLog.warn("Lines read from {}:  {}", nextFilename, lnr.getLineNumber());
396 
397 				} catch (IOException e) {
398 					throw new InternalErrorException(e);
399 				} finally {
400 					IOUtils.closeQuietly(reader);
401 				}
402 
403 				foundHlaNom = true;
404 			}
405 
406 			if (IMGTHLA_HLA_XML.equals(nextFilename) || nextFilename.endsWith("/" + IMGTHLA_HLA_XML)) {
407 				// process hla.xml file
408 				ourLog.info("Processing file {}", nextFilename);
409 
410 //				IRecordHandler handler = new HlaXmlHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
411 //				AlleleSource alleleSource = new HlaXmlAlleleSource(hlaXmlFilename);
412 
413 				Reader reader = null;
414 				try {
415 					reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
416 
417 					if (ourLog.isTraceEnabled()) {
418 						String contents = IOUtils.toString(reader);
419 						ourLog.info("File contents for: {}\n{}", nextFilename, contents);
420 						reader = new StringReader(contents);
421 					}
422 
423 					LineNumberReader lnr = new LineNumberReader(reader);
424 					while (lnr.readLine() != null) {
425 					}
426 					ourLog.warn("Lines read from {}:  {}", nextFilename, lnr.getLineNumber());
427 
428 				} catch (IOException e) {
429 					throw new InternalErrorException(e);
430 				} finally {
431 					IOUtils.closeQuietly(reader);
432 				}
433 
434 				foundHlaXml = true;
435 			}
436 
437 		}
438 
439 		if (!foundHlaNom) {
440 			throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_NOM_TXT);
441 		}
442 
443 		if (!foundHlaXml) {
444 			throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_XML);
445 		}
446 
447 		int valueSetCount = valueSets.size();
448 		int rootConceptCount = codeSystemVersion.getConcepts().size();
449 		int conceptCount = code2concept.size();
450 		ourLog.info("Have {} total concepts, {} root concepts, {} ValueSets", conceptCount, rootConceptCount, valueSetCount);
451 
452 		// remove this when fully implemented ...
453 		throw new InternalErrorException("HLA nomenclature terminology upload not yet fully implemented.");
454 
455 //		IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, imgthlaCs, valueSets, conceptMaps);
456 //
457 //		return new UploadStatistics(conceptCount, target);
458 	}
459 
460 	UploadStatistics processLoincFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails, Properties theUploadProperties) {
461 		final TermCodeSystemVersionermCodeSystemVersion">TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
462 		final Map<String, TermConcept> code2concept = new HashMap<>();
463 		final List<ValueSet> valueSets = new ArrayList<>();
464 		final List<ConceptMap> conceptMaps = new ArrayList<>();
465 
466 		CodeSystem loincCs;
467 		try {
468 			String loincCsString = IOUtils.toString(BaseHapiTerminologySvcImpl.class.getResourceAsStream("/ca/uhn/fhir/jpa/term/loinc/loinc.xml"), Charsets.UTF_8);
469 			loincCs = FhirContext.forR4().newXmlParser().parseResource(CodeSystem.class, loincCsString);
470 		} catch (IOException e) {
471 			throw new InternalErrorException("Failed to load loinc.xml", e);
472 		}
473 
474 		Map<String, CodeSystem.PropertyType> propertyNamesToTypes = new HashMap<>();
475 		for (CodeSystem.PropertyComponent nextProperty : loincCs.getProperty()) {
476 			String nextPropertyCode = nextProperty.getCode();
477 			CodeSystem.PropertyType nextPropertyType = nextProperty.getType();
478 			if (isNotBlank(nextPropertyCode)) {
479 				propertyNamesToTypes.put(nextPropertyCode, nextPropertyType);
480 			}
481 		}
482 
483 		IRecordHandler handler;
484 
485 		// Part
486 		handler = new LoincPartHandler(codeSystemVersion, code2concept);
487 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_PART_FILE.getCode(), LOINC_PART_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
488 		Map<PartTypeAndPartName, String> partTypeAndPartNameToPartNumber = ((LoincPartHandler) handler).getPartTypeAndPartNameToPartNumber();
489 
490 		// LOINC codes
491 		handler = new LoincHandler(codeSystemVersion, code2concept, propertyNamesToTypes, partTypeAndPartNameToPartNumber);
492 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
493 
494 		// LOINC hierarchy
495 		handler = new LoincHierarchyHandler(codeSystemVersion, code2concept);
496 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_HIERARCHY_FILE.getCode(), LOINC_HIERARCHY_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
497 
498 		// Answer lists (ValueSets of potential answers/values for LOINC "questions")
499 		handler = new LoincAnswerListHandler(codeSystemVersion, code2concept, valueSets, conceptMaps, theUploadProperties);
500 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_ANSWERLIST_FILE.getCode(), LOINC_ANSWERLIST_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
501 
502 		// Answer list links (connects LOINC observation codes to answer list codes)
503 		handler = new LoincAnswerListLinkHandler(code2concept, valueSets);
504 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_ANSWERLIST_LINK_FILE.getCode(), LOINC_ANSWERLIST_LINK_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
505 
506 		// RSNA playbook
507 		// Note that this should come before the "Part Related Code Mapping"
508 		// file because there are some duplicate mappings between these
509 		// two files, and the RSNA Playbook file has more metadata
510 		handler = new LoincRsnaPlaybookHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
511 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_RSNA_PLAYBOOK_FILE.getCode(), LOINC_RSNA_PLAYBOOK_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
512 
513 		// Part link
514 		handler = new LoincPartLinkHandler(codeSystemVersion, code2concept);
515 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_PART_LINK_FILE.getCode(), LOINC_PART_LINK_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
516 
517 		// Part related code mapping
518 		handler = new LoincPartRelatedCodeMappingHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
519 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_PART_RELATED_CODE_MAPPING_FILE.getCode(), LOINC_PART_RELATED_CODE_MAPPING_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
520 
521 		// Document ontology
522 		handler = new LoincDocumentOntologyHandler(code2concept, propertyNamesToTypes, valueSets, conceptMaps, theUploadProperties);
523 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_DOCUMENT_ONTOLOGY_FILE.getCode(), LOINC_DOCUMENT_ONTOLOGY_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
524 
525 		// Top 2000 codes - US
526 		handler = new LoincTop2000LabResultsUsHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
527 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE.getCode(), LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
528 
529 		// Top 2000 codes - SI
530 		handler = new LoincTop2000LabResultsSiHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
531 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE.getCode(), LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
532 
533 		// Universal lab order ValueSet
534 		handler = new LoincUniversalOrderSetHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
535 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE.getCode(), LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
536 
537 		// IEEE medical device codes
538 		handler = new LoincIeeeMedicalDeviceCodeHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
539 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE.getCode(), LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
540 
541 		// Imaging document codes
542 		handler = new LoincImagingDocumentCodeHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
543 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_IMAGING_DOCUMENT_CODES_FILE.getCode(), LOINC_IMAGING_DOCUMENT_CODES_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
544 
545 		// Group
546 		handler = new LoincGroupFileHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
547 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_GROUP_FILE.getCode(), LOINC_GROUP_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
548 
549 		// Group terms
550 		handler = new LoincGroupTermsFileHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
551 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_GROUP_TERMS_FILE.getCode(), LOINC_GROUP_TERMS_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
552 
553 		// Parent group
554 		handler = new LoincParentGroupFileHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
555 		iterateOverZipFile(theDescriptors, theUploadProperties.getProperty(LOINC_PARENT_GROUP_FILE.getCode(), LOINC_PARENT_GROUP_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
556 
557 		IOUtils.closeQuietly(theDescriptors);
558 
559 		valueSets.add(getValueSetLoincAll());
560 
561 		for (Entry<String, TermConcept> next : code2concept.entrySet()) {
562 			TermConcept nextConcept = next.getValue();
563 			if (nextConcept.getParents().isEmpty()) {
564 				codeSystemVersion.getConcepts().add(nextConcept);
565 			}
566 		}
567 
568 		int valueSetCount = valueSets.size();
569 		int rootConceptCount = codeSystemVersion.getConcepts().size();
570 		int conceptCount = code2concept.size();
571 		ourLog.info("Have {} total concepts, {} root concepts, {} ValueSets", conceptCount, rootConceptCount, valueSetCount);
572 
573 		IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, loincCs, valueSets, conceptMaps);
574 
575 		return new UploadStatistics(conceptCount, target);
576 	}
577 
578 	private ValueSet getValueSetLoincAll() {
579 		ValueSet retVal = new ValueSet();
580 
581 		retVal.setId("loinc-all");
582 		retVal.setUrl("http://loinc.org/vs");
583 		retVal.setVersion("1.0.0");
584 		retVal.setName("All LOINC codes");
585 		retVal.setStatus(Enumerations.PublicationStatus.ACTIVE);
586 		retVal.setDate(new Date());
587 		retVal.setPublisher("Regenstrief Institute, Inc.");
588 		retVal.setDescription("A value set that includes all LOINC codes");
589 		retVal.setCopyright("This content from LOINC® is copyright © 1995 Regenstrief Institute, Inc. and the LOINC Committee, and available at no cost under the license at https://loinc.org/license/");
590 		retVal.getCompose().addInclude().setSystem(IHapiTerminologyLoaderSvc.LOINC_URI);
591 
592 		return retVal;
593 	}
594 
595 	private UploadStatistics processSnomedCtFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
596 		final TermCodeSystemVersionermCodeSystemVersion">TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
597 		final Map<String, TermConcept> id2concept = new HashMap<>();
598 		final Map<String, TermConcept> code2concept = new HashMap<>();
599 		final Set<String> validConceptIds = new HashSet<>();
600 
601 		IRecordHandler handler = new SctHandlerConcept(validConceptIds);
602 		iterateOverZipFile(theDescriptors, SCT_FILE_CONCEPT, handler, '\t', null, true);
603 
604 		ourLog.info("Have {} valid concept IDs", validConceptIds.size());
605 
606 		handler = new SctHandlerDescription(validConceptIds, code2concept, id2concept, codeSystemVersion);
607 		iterateOverZipFile(theDescriptors, SCT_FILE_DESCRIPTION, handler, '\t', null, true);
608 
609 		ourLog.info("Got {} concepts, cloning map", code2concept.size());
610 		final HashMap<String, TermConcept> rootConcepts = new HashMap<>(code2concept);
611 
612 		handler = new SctHandlerRelationship(codeSystemVersion, rootConcepts, code2concept);
613 		iterateOverZipFile(theDescriptors, SCT_FILE_RELATIONSHIP, handler, '\t', null, true);
614 
615 		IOUtils.closeQuietly(theDescriptors);
616 
617 		ourLog.info("Looking for root codes");
618 		rootConcepts
619 			.entrySet()
620 			.removeIf(theStringTermConceptEntry -> theStringTermConceptEntry.getValue().getParents().isEmpty() == false);
621 
622 		ourLog.info("Done loading SNOMED CT files - {} root codes, {} total codes", rootConcepts.size(), code2concept.size());
623 
624 		Counter circularCounter = new Counter();
625 		for (TermConcept next : rootConcepts.values()) {
626 			long count = circularCounter.getThenAdd();
627 			float pct = ((float) count / rootConcepts.size()) * 100.0f;
628 			ourLog.info(" * Scanning for circular refs - have scanned {} / {} codes ({}%)", count, rootConcepts.size(), pct);
629 			dropCircularRefs(next, new ArrayList<>(), code2concept, circularCounter);
630 		}
631 
632 		codeSystemVersion.getConcepts().addAll(rootConcepts.values());
633 
634 		CodeSystem cs = new org.hl7.fhir.r4.model.CodeSystem();
635 		cs.setUrl(SCT_URI);
636 		cs.setName("SNOMED CT");
637 		cs.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
638 		IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, cs, null, null);
639 
640 		return new UploadStatistics(code2concept.size(), target);
641 	}
642 
643 	@VisibleForTesting
644 	void setTermSvcForUnitTests(IHapiTerminologySvc theTermSvc) {
645 		myTermSvc = theTermSvc;
646 	}
647 
648 	private IIdType storeCodeSystem(RequestDetails theRequestDetails, final TermCodeSystemVersion theCodeSystemVersion, CodeSystem theCodeSystem, List<ValueSet> theValueSets, List<ConceptMap> theConceptMaps) {
649 		Validate.isTrue(theCodeSystem.getContent() == CodeSystem.CodeSystemContentMode.NOTPRESENT);
650 
651 		List<ValueSet> valueSets = ObjectUtils.defaultIfNull(theValueSets, Collections.emptyList());
652 		List<ConceptMap> conceptMaps = ObjectUtils.defaultIfNull(theConceptMaps, Collections.emptyList());
653 
654 		IIdType retVal;
655 		myTermSvc.setProcessDeferred(false);
656 		retVal = myTermSvc.storeNewCodeSystemVersion(theCodeSystem, theCodeSystemVersion, theRequestDetails, valueSets, conceptMaps);
657 		myTermSvc.setProcessDeferred(true);
658 
659 		return retVal;
660 	}
661 
662 	static class LoadedFileDescriptors implements Closeable {
663 
664 		private List<File> myTemporaryFiles = new ArrayList<>();
665 		private List<IHapiTerminologyLoaderSvc.FileDescriptor> myUncompressedFileDescriptors = new ArrayList<>();
666 
667 		LoadedFileDescriptors(List<IHapiTerminologyLoaderSvc.FileDescriptor> theFileDescriptors) {
668 			try {
669 				for (FileDescriptor next : theFileDescriptors) {
670 					if (next.getFilename().toLowerCase().endsWith(".zip")) {
671 						ourLog.info("Uncompressing {} into temporary files", next.getFilename());
672 						try (InputStream inputStream = next.getInputStream()) {
673 							ZipInputStream zis = new ZipInputStream(new BufferedInputStream(inputStream));
674 							for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null; ) {
675 								BOMInputStream fis = new BOMInputStream(zis);
676 								File nextTemporaryFile = File.createTempFile("hapifhir", ".tmp");
677 								nextTemporaryFile.deleteOnExit();
678 								FileOutputStream fos = new FileOutputStream(nextTemporaryFile, false);
679 								IOUtils.copy(fis, fos);
680 								String nextEntryFileName = nextEntry.getName();
681 								myUncompressedFileDescriptors.add(new FileDescriptor() {
682 									@Override
683 									public String getFilename() {
684 										return nextEntryFileName;
685 									}
686 
687 									@Override
688 									public InputStream getInputStream() {
689 										try {
690 											return new FileInputStream(nextTemporaryFile);
691 										} catch (FileNotFoundException e) {
692 											throw new InternalErrorException(e);
693 										}
694 									}
695 								});
696 								myTemporaryFiles.add(nextTemporaryFile);
697 							}
698 						}
699 					} else {
700 						myUncompressedFileDescriptors.add(next);
701 					}
702 
703 				}
704 			} catch (Exception e) {
705 				close();
706 				throw new InternalErrorException(e);
707 			}
708 		}
709 
710 		boolean hasFile(String theFilename) {
711 			return myUncompressedFileDescriptors
712 				.stream()
713 				.map(t -> t.getFilename().replaceAll(".*[\\\\/]", "")) // Strip the path from the filename
714 				.anyMatch(t -> t.equals(theFilename));
715 		}
716 
717 		@Override
718 		public void close() {
719 			for (File next : myTemporaryFiles) {
720 				FileUtils.deleteQuietly(next);
721 			}
722 		}
723 
724 		List<IHapiTerminologyLoaderSvc.FileDescriptor> getUncompressedFileDescriptors() {
725 			return myUncompressedFileDescriptors;
726 		}
727 
728 		private List<String> notFound(List<String> theExpectedFilenameFragments) {
729 			Set<String> foundFragments = new HashSet<>();
730 			for (String nextExpected : theExpectedFilenameFragments) {
731 				for (FileDescriptor next : myUncompressedFileDescriptors) {
732 					if (next.getFilename().contains(nextExpected)) {
733 						foundFragments.add(nextExpected);
734 						break;
735 					}
736 				}
737 			}
738 
739 			ArrayList<String> notFoundFileNameFragments = new ArrayList<>(theExpectedFilenameFragments);
740 			notFoundFileNameFragments.removeAll(foundFragments);
741 			return notFoundFileNameFragments;
742 		}
743 
744 		private void verifyMandatoryFilesExist(List<String> theExpectedFilenameFragments) {
745 			List<String> notFound = notFound(theExpectedFilenameFragments);
746 			if (!notFound.isEmpty()) {
747 				throw new UnprocessableEntityException("Could not find the following mandatory files in input: " + notFound);
748 			}
749 		}
750 
751 		private void verifyOptionalFilesExist(List<String> theExpectedFilenameFragments) {
752 			List<String> notFound = notFound(theExpectedFilenameFragments);
753 			if (!notFound.isEmpty()) {
754 				ourLog.warn("Could not find the following optional files: " + notFound);
755 			}
756 		}
757 
758 
759 	}
760 
761 	public static String firstNonBlank(String... theStrings) {
762 		String retVal = "";
763 		for (String nextString : theStrings) {
764 			if (isNotBlank(nextString)) {
765 				retVal = nextString;
766 				break;
767 			}
768 		}
769 		return retVal;
770 	}
771 
772 	public static TermConcept getOrCreateConcept(TermCodeSystemVersion codeSystemVersion, Map<String, TermConcept> id2concept, String id) {
773 		TermConcept concept = id2concept.get(id);
774 		if (concept == null) {
775 			concept = new TermConcept();
776 			id2concept.put(id, concept);
777 			concept.setCodeSystemVersion(codeSystemVersion);
778 		}
779 		return concept;
780 	}
781 }