001/*
002 * #%L
003 * HAPI FHIR JPA Server
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.jpa.term;
021
022import ca.uhn.fhir.context.FhirContext;
023import ca.uhn.fhir.i18n.Msg;
024import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
025import ca.uhn.fhir.jpa.entity.TermConcept;
026import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink;
027import ca.uhn.fhir.jpa.entity.TermConceptProperty;
028import ca.uhn.fhir.jpa.term.api.ITermCodeSystemStorageSvc;
029import ca.uhn.fhir.jpa.term.api.ITermDeferredStorageSvc;
030import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc;
031import ca.uhn.fhir.jpa.term.custom.CustomTerminologySet;
032import ca.uhn.fhir.jpa.term.icd10.Icd10Loader;
033import ca.uhn.fhir.jpa.term.icd10cm.Icd10CmLoader;
034import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListHandler;
035import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListLinkHandler;
036import ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler;
037import ca.uhn.fhir.jpa.term.loinc.LoincConsumerNameHandler;
038import ca.uhn.fhir.jpa.term.loinc.LoincDocumentOntologyHandler;
039import ca.uhn.fhir.jpa.term.loinc.LoincGroupFileHandler;
040import ca.uhn.fhir.jpa.term.loinc.LoincGroupTermsFileHandler;
041import ca.uhn.fhir.jpa.term.loinc.LoincHandler;
042import ca.uhn.fhir.jpa.term.loinc.LoincHierarchyHandler;
043import ca.uhn.fhir.jpa.term.loinc.LoincIeeeMedicalDeviceCodeHandler;
044import ca.uhn.fhir.jpa.term.loinc.LoincImagingDocumentCodeHandler;
045import ca.uhn.fhir.jpa.term.loinc.LoincLinguisticVariantHandler;
046import ca.uhn.fhir.jpa.term.loinc.LoincLinguisticVariantsHandler;
047import ca.uhn.fhir.jpa.term.loinc.LoincMapToHandler;
048import ca.uhn.fhir.jpa.term.loinc.LoincParentGroupFileHandler;
049import ca.uhn.fhir.jpa.term.loinc.LoincPartHandler;
050import ca.uhn.fhir.jpa.term.loinc.LoincPartLinkHandler;
051import ca.uhn.fhir.jpa.term.loinc.LoincPartRelatedCodeMappingHandler;
052import ca.uhn.fhir.jpa.term.loinc.LoincRsnaPlaybookHandler;
053import ca.uhn.fhir.jpa.term.loinc.LoincTop2000LabResultsSiHandler;
054import ca.uhn.fhir.jpa.term.loinc.LoincTop2000LabResultsUsHandler;
055import ca.uhn.fhir.jpa.term.loinc.LoincUniversalOrderSetHandler;
056import ca.uhn.fhir.jpa.term.loinc.LoincXmlFileZipContentsHandler;
057import ca.uhn.fhir.jpa.term.loinc.PartTypeAndPartName;
058import ca.uhn.fhir.jpa.term.snomedct.SctHandlerConcept;
059import ca.uhn.fhir.jpa.term.snomedct.SctHandlerDescription;
060import ca.uhn.fhir.jpa.term.snomedct.SctHandlerRelationship;
061import ca.uhn.fhir.jpa.util.Counter;
062import ca.uhn.fhir.rest.api.EncodingEnum;
063import ca.uhn.fhir.rest.api.server.RequestDetails;
064import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
065import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
066import ca.uhn.fhir.util.ValidateUtil;
067import com.google.common.annotations.VisibleForTesting;
068import com.google.common.base.Charsets;
069import jakarta.annotation.Nonnull;
070import org.apache.commons.csv.CSVFormat;
071import org.apache.commons.csv.CSVParser;
072import org.apache.commons.csv.CSVRecord;
073import org.apache.commons.csv.QuoteMode;
074import org.apache.commons.io.IOUtils;
075import org.apache.commons.lang3.ObjectUtils;
076import org.apache.commons.lang3.StringUtils;
077import org.apache.commons.lang3.Validate;
078import org.hl7.fhir.instance.model.api.IIdType;
079import org.hl7.fhir.r4.model.CodeSystem;
080import org.hl7.fhir.r4.model.ConceptMap;
081import org.hl7.fhir.r4.model.Enumerations;
082import org.hl7.fhir.r4.model.ValueSet;
083import org.springframework.aop.support.AopUtils;
084import org.springframework.beans.factory.annotation.Autowired;
085import org.xml.sax.SAXException;
086
087import java.io.IOException;
088import java.io.InputStream;
089import java.io.InputStreamReader;
090import java.io.LineNumberReader;
091import java.io.Reader;
092import java.util.ArrayList;
093import java.util.Arrays;
094import java.util.Collections;
095import java.util.Date;
096import java.util.HashMap;
097import java.util.HashSet;
098import java.util.Iterator;
099import java.util.List;
100import java.util.Locale;
101import java.util.Map;
102import java.util.Map.Entry;
103import java.util.Optional;
104import java.util.Properties;
105import java.util.Set;
106import java.util.stream.Collectors;
107
108import static ca.uhn.fhir.jpa.term.api.ITermCodeSystemStorageSvc.MAKE_LOADING_VERSION_CURRENT;
109import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_ANSWERLIST_FILE;
110import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_ANSWERLIST_FILE_DEFAULT;
111import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_ANSWERLIST_LINK_FILE;
112import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_ANSWERLIST_LINK_FILE_DEFAULT;
113import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_CODESYSTEM_MAKE_CURRENT;
114import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_CODESYSTEM_VERSION;
115import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_CONSUMER_NAME_FILE;
116import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_CONSUMER_NAME_FILE_DEFAULT;
117import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_DOCUMENT_ONTOLOGY_FILE;
118import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_DOCUMENT_ONTOLOGY_FILE_DEFAULT;
119import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_FILE;
120import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_FILE_DEFAULT;
121import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_GROUP_FILE;
122import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_GROUP_FILE_DEFAULT;
123import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_GROUP_TERMS_FILE;
124import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_GROUP_TERMS_FILE_DEFAULT;
125import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_HIERARCHY_FILE;
126import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_HIERARCHY_FILE_DEFAULT;
127import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE;
128import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE_DEFAULT;
129import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_IMAGING_DOCUMENT_CODES_FILE;
130import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_IMAGING_DOCUMENT_CODES_FILE_DEFAULT;
131import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_LINGUISTIC_VARIANTS_FILE;
132import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_LINGUISTIC_VARIANTS_FILE_DEFAULT;
133import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_LINGUISTIC_VARIANTS_PATH;
134import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_LINGUISTIC_VARIANTS_PATH_DEFAULT;
135import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_MAPTO_FILE;
136import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_MAPTO_FILE_DEFAULT;
137import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PARENT_GROUP_FILE;
138import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PARENT_GROUP_FILE_DEFAULT;
139import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_FILE;
140import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_FILE_DEFAULT;
141import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE;
142import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE_DEFAULT;
143import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE_PRIMARY;
144import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE_PRIMARY_DEFAULT;
145import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE_SUPPLEMENTARY;
146import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_LINK_FILE_SUPPLEMENTARY_DEFAULT;
147import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_RELATED_CODE_MAPPING_FILE;
148import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_PART_RELATED_CODE_MAPPING_FILE_DEFAULT;
149import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_RSNA_PLAYBOOK_FILE;
150import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_RSNA_PLAYBOOK_FILE_DEFAULT;
151import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE;
152import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE_DEFAULT;
153import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE;
154import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE_DEFAULT;
155import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE;
156import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE_DEFAULT;
157import static ca.uhn.fhir.jpa.term.loinc.LoincUploadPropertiesEnum.LOINC_UPLOAD_PROPERTIES_FILE;
158import static org.apache.commons.lang3.StringUtils.isBlank;
159import static org.apache.commons.lang3.StringUtils.isNotBlank;
160import static org.hl7.fhir.common.hapi.validation.support.ValidationConstants.LOINC_ALL_VALUESET_ID;
161
162public class TermLoaderSvcImpl implements ITermLoaderSvc {
163        public static final String CUSTOM_CONCEPTS_FILE = "concepts.csv";
164        public static final String CUSTOM_HIERARCHY_FILE = "hierarchy.csv";
165        public static final String CUSTOM_PROPERTIES_FILE = "properties.csv";
166        static final String IMGTHLA_HLA_NOM_TXT = "hla_nom.txt";
167        static final String IMGTHLA_HLA_XML = "hla.xml";
168        static final String CUSTOM_CODESYSTEM_JSON = "codesystem.json";
169        private static final String SCT_FILE_CONCEPT = "Terminology/sct2_Concept_Full_";
170        private static final String SCT_FILE_DESCRIPTION = "Terminology/sct2_Description_Full";
171        private static final String SCT_FILE_RELATIONSHIP = "Terminology/sct2_Relationship_Full";
172        private static final String CUSTOM_CODESYSTEM_XML = "codesystem.xml";
173
174        private static final int LOG_INCREMENT = 1000;
175        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TermLoaderSvcImpl.class);
176        // FYI: Hardcoded to R4 because that's what the term svc uses internally
177        private final FhirContext myCtx = FhirContext.forR4Cached();
178        private final ITermDeferredStorageSvc myDeferredStorageSvc;
179        private final ITermCodeSystemStorageSvc myCodeSystemStorageSvc;
180
181        @Autowired
182        public TermLoaderSvcImpl(
183                        ITermDeferredStorageSvc theDeferredStorageSvc, ITermCodeSystemStorageSvc theCodeSystemStorageSvc) {
184                this(theDeferredStorageSvc, theCodeSystemStorageSvc, true);
185        }
186
187        private TermLoaderSvcImpl(
188                        ITermDeferredStorageSvc theDeferredStorageSvc,
189                        ITermCodeSystemStorageSvc theCodeSystemStorageSvc,
190                        boolean theProxyCheck) {
191                if (theProxyCheck) {
192                        // If these validations start failing, it likely means a cyclic dependency has been introduced into the
193                        // Spring Application
194                        // Context that is preventing the Spring auto-proxy bean post-processor from being able to proxy these
195                        // beans.  Check
196                        // for recent changes to the Spring @Configuration that may have caused this.
197                        Validate.isTrue(
198                                        AopUtils.isAopProxy(theDeferredStorageSvc),
199                                        theDeferredStorageSvc.getClass().getName()
200                                                        + " is not a proxy.  @Transactional annotations will be ignored.");
201                        Validate.isTrue(
202                                        AopUtils.isAopProxy(theCodeSystemStorageSvc),
203                                        theCodeSystemStorageSvc.getClass().getName()
204                                                        + " is not a proxy.  @Transactional annotations will be ignored.");
205                }
206                myDeferredStorageSvc = theDeferredStorageSvc;
207                myCodeSystemStorageSvc = theCodeSystemStorageSvc;
208        }
209
210        @VisibleForTesting
211        public static TermLoaderSvcImpl withoutProxyCheck(
212                        ITermDeferredStorageSvc theTermDeferredStorageSvc, ITermCodeSystemStorageSvc theTermCodeSystemStorageSvc) {
213                return new TermLoaderSvcImpl(theTermDeferredStorageSvc, theTermCodeSystemStorageSvc, false);
214        }
215
216        @Override
217        public UploadStatistics loadImgthla(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
218                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
219                        List<String> mandatoryFilenameFragments = Arrays.asList(IMGTHLA_HLA_NOM_TXT, IMGTHLA_HLA_XML);
220                        descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments);
221
222                        ourLog.info("Beginning IMGTHLA processing");
223
224                        return processImgthlaFiles(descriptors, theRequestDetails);
225                }
226        }
227
228        @VisibleForTesting
229        LoadedFileDescriptors getLoadedFileDescriptors(List<FileDescriptor> theFiles) {
230                return new LoadedFileDescriptors(theFiles);
231        }
232
233        @Override
234        public UploadStatistics loadLoinc(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
235                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
236                        Properties uploadProperties = getProperties(descriptors, LOINC_UPLOAD_PROPERTIES_FILE.getCode());
237
238                        String codeSystemVersionId = uploadProperties.getProperty(LOINC_CODESYSTEM_VERSION.getCode());
239                        boolean isMakeCurrentVersion =
240                                        Boolean.parseBoolean(uploadProperties.getProperty(LOINC_CODESYSTEM_MAKE_CURRENT.getCode(), "true"));
241
242                        if (StringUtils.isBlank(codeSystemVersionId) && !isMakeCurrentVersion) {
243                                throw new InvalidRequestException(
244                                                Msg.code(864) + "'" + LOINC_CODESYSTEM_VERSION.getCode() + "' property is required when '"
245                                                                + LOINC_CODESYSTEM_MAKE_CURRENT.getCode() + "' property is 'false'");
246                        }
247
248                        List<String> mandatoryFilenameFragments = Arrays.asList(
249                                        uploadProperties.getProperty(
250                                                        LOINC_ANSWERLIST_FILE.getCode(), LOINC_ANSWERLIST_FILE_DEFAULT.getCode()),
251                                        uploadProperties.getProperty(
252                                                        LOINC_ANSWERLIST_LINK_FILE.getCode(), LOINC_ANSWERLIST_LINK_FILE_DEFAULT.getCode()),
253                                        uploadProperties.getProperty(
254                                                        LOINC_DOCUMENT_ONTOLOGY_FILE.getCode(), LOINC_DOCUMENT_ONTOLOGY_FILE_DEFAULT.getCode()),
255                                        uploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()),
256                                        uploadProperties.getProperty(
257                                                        LOINC_HIERARCHY_FILE.getCode(), LOINC_HIERARCHY_FILE_DEFAULT.getCode()),
258                                        uploadProperties.getProperty(
259                                                        LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE.getCode(),
260                                                        LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE_DEFAULT.getCode()),
261                                        uploadProperties.getProperty(
262                                                        LOINC_IMAGING_DOCUMENT_CODES_FILE.getCode(),
263                                                        LOINC_IMAGING_DOCUMENT_CODES_FILE_DEFAULT.getCode()),
264                                        uploadProperties.getProperty(LOINC_PART_FILE.getCode(), LOINC_PART_FILE_DEFAULT.getCode()),
265                                        uploadProperties.getProperty(
266                                                        LOINC_PART_RELATED_CODE_MAPPING_FILE.getCode(),
267                                                        LOINC_PART_RELATED_CODE_MAPPING_FILE_DEFAULT.getCode()),
268                                        uploadProperties.getProperty(
269                                                        LOINC_RSNA_PLAYBOOK_FILE.getCode(), LOINC_RSNA_PLAYBOOK_FILE_DEFAULT.getCode()),
270                                        uploadProperties.getProperty(
271                                                        LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE.getCode(),
272                                                        LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE_DEFAULT.getCode()));
273                        descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments);
274
275                        List<String> splitPartLinkFilenameFragments = Arrays.asList(
276                                        uploadProperties.getProperty(
277                                                        LOINC_PART_LINK_FILE_PRIMARY.getCode(), LOINC_PART_LINK_FILE_PRIMARY_DEFAULT.getCode()),
278                                        uploadProperties.getProperty(
279                                                        LOINC_PART_LINK_FILE_SUPPLEMENTARY.getCode(),
280                                                        LOINC_PART_LINK_FILE_SUPPLEMENTARY_DEFAULT.getCode()));
281                        descriptors.verifyPartLinkFilesExist(
282                                        splitPartLinkFilenameFragments,
283                                        uploadProperties.getProperty(
284                                                        LOINC_PART_LINK_FILE.getCode(), LOINC_PART_LINK_FILE_DEFAULT.getCode()));
285
286                        List<String> optionalFilenameFragments = Arrays.asList(
287                                        uploadProperties.getProperty(LOINC_GROUP_FILE.getCode(), LOINC_GROUP_FILE_DEFAULT.getCode()),
288                                        uploadProperties.getProperty(
289                                                        LOINC_GROUP_TERMS_FILE.getCode(), LOINC_GROUP_TERMS_FILE_DEFAULT.getCode()),
290                                        uploadProperties.getProperty(
291                                                        LOINC_PARENT_GROUP_FILE.getCode(), LOINC_PARENT_GROUP_FILE_DEFAULT.getCode()),
292                                        uploadProperties.getProperty(
293                                                        LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE.getCode(),
294                                                        LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE_DEFAULT.getCode()),
295                                        uploadProperties.getProperty(
296                                                        LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE.getCode(),
297                                                        LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE_DEFAULT.getCode()),
298                                        uploadProperties.getProperty(LOINC_MAPTO_FILE.getCode(), LOINC_MAPTO_FILE_DEFAULT.getCode()),
299
300                                        // -- optional consumer name
301                                        uploadProperties.getProperty(
302                                                        LOINC_CONSUMER_NAME_FILE.getCode(), LOINC_CONSUMER_NAME_FILE_DEFAULT.getCode()),
303                                        uploadProperties.getProperty(
304                                                        LOINC_LINGUISTIC_VARIANTS_FILE.getCode(),
305                                                        LOINC_LINGUISTIC_VARIANTS_FILE_DEFAULT.getCode()));
306
307                        descriptors.verifyOptionalFilesExist(optionalFilenameFragments);
308
309                        ourLog.info("Beginning LOINC processing");
310
311                        if (isMakeCurrentVersion) {
312                                if (codeSystemVersionId != null) {
313                                        processLoincFiles(descriptors, theRequestDetails, uploadProperties, false);
314                                        uploadProperties.remove(LOINC_CODESYSTEM_VERSION.getCode());
315                                }
316                                ourLog.info("Uploading CodeSystem and making it current version");
317
318                        } else {
319                                ourLog.info("Uploading CodeSystem without updating current version");
320                        }
321
322                        theRequestDetails.getUserData().put(MAKE_LOADING_VERSION_CURRENT, isMakeCurrentVersion);
323                        return processLoincFiles(descriptors, theRequestDetails, uploadProperties, true);
324                }
325        }
326
327        @Override
328        public UploadStatistics loadSnomedCt(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
329                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
330
331                        List<String> expectedFilenameFragments =
332                                        Arrays.asList(SCT_FILE_DESCRIPTION, SCT_FILE_RELATIONSHIP, SCT_FILE_CONCEPT);
333                        descriptors.verifyMandatoryFilesExist(expectedFilenameFragments);
334
335                        ourLog.info("Beginning SNOMED CT processing");
336
337                        return processSnomedCtFiles(descriptors, theRequestDetails);
338                }
339        }
340
341        @Override
342        public UploadStatistics loadIcd10(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
343                ourLog.info("Beginning ICD-10 processing");
344
345                CodeSystem codeSystem = new CodeSystem();
346                codeSystem.setUrl(ICD10_URI);
347                codeSystem.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
348                codeSystem.setStatus(Enumerations.PublicationStatus.ACTIVE);
349
350                TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
351                int count = 0;
352
353                try (LoadedFileDescriptors compressedDescriptors = getLoadedFileDescriptors(theFiles)) {
354                        for (FileDescriptor nextDescriptor : compressedDescriptors.getUncompressedFileDescriptors()) {
355                                if (nextDescriptor.getFilename().toLowerCase(Locale.US).endsWith(".xml")) {
356                                        try (InputStream inputStream = nextDescriptor.getInputStream();
357                                                        InputStreamReader reader = new InputStreamReader(inputStream, Charsets.UTF_8)) {
358                                                Icd10Loader loader = new Icd10Loader(codeSystem, codeSystemVersion);
359                                                loader.load(reader);
360                                                count += loader.getConceptCount();
361                                        }
362                                }
363                        }
364                } catch (IOException | SAXException e) {
365                        throw new InternalErrorException(Msg.code(2135) + e);
366                }
367
368                codeSystem.setVersion(codeSystemVersion.getCodeSystemVersionId());
369
370                IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, codeSystem, null, null);
371                return new UploadStatistics(count, target);
372        }
373
374        @Override
375        public UploadStatistics loadIcd10cm(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
376                ourLog.info("Beginning ICD-10-cm processing");
377
378                CodeSystem cs = new CodeSystem();
379                cs.setUrl(ICD10CM_URI);
380                cs.setName("ICD-10-CM");
381                cs.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
382                cs.setStatus(Enumerations.PublicationStatus.ACTIVE);
383
384                TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
385                int count = 0;
386
387                try (LoadedFileDescriptors compressedDescriptors = getLoadedFileDescriptors(theFiles)) {
388                        for (FileDescriptor nextDescriptor : compressedDescriptors.getUncompressedFileDescriptors()) {
389                                if (nextDescriptor.getFilename().toLowerCase(Locale.US).endsWith(".xml")) {
390                                        try (InputStream inputStream = nextDescriptor.getInputStream();
391                                                        InputStreamReader reader = new InputStreamReader(inputStream, Charsets.UTF_8)) {
392                                                Icd10CmLoader loader = new Icd10CmLoader(codeSystemVersion);
393                                                loader.load(reader);
394                                                count += loader.getConceptCount();
395                                        }
396                                }
397                        }
398                } catch (IOException | SAXException e) {
399                        throw new InternalErrorException(Msg.code(865) + e);
400                }
401
402                cs.setVersion(codeSystemVersion.getCodeSystemVersionId());
403
404                IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, cs, null, null);
405                return new UploadStatistics(count, target);
406        }
407
408        @Override
409        public UploadStatistics loadCustom(
410                        String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
411                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
412                        Optional<String> codeSystemContent = loadFile(descriptors, CUSTOM_CODESYSTEM_JSON, CUSTOM_CODESYSTEM_XML);
413                        CodeSystem codeSystem;
414                        if (codeSystemContent.isPresent()) {
415                                codeSystem = EncodingEnum.detectEncoding(codeSystemContent.get())
416                                                .newParser(myCtx)
417                                                .parseResource(CodeSystem.class, codeSystemContent.get());
418                                ValidateUtil.isTrueOrThrowInvalidRequest(
419                                                theSystem.equalsIgnoreCase(codeSystem.getUrl()),
420                                                "CodeSystem.url does not match the supplied system: %s",
421                                                theSystem);
422                                ValidateUtil.isTrueOrThrowInvalidRequest(
423                                                CodeSystem.CodeSystemContentMode.NOTPRESENT.equals(codeSystem.getContent()),
424                                                "CodeSystem.content does not match the expected value: %s",
425                                                CodeSystem.CodeSystemContentMode.NOTPRESENT.toCode());
426                        } else {
427                                codeSystem = new CodeSystem();
428                                codeSystem.setUrl(theSystem);
429                                codeSystem.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
430                        }
431
432                        CustomTerminologySet terminologySet = CustomTerminologySet.load(descriptors, false);
433                        TermCodeSystemVersion csv = terminologySet.toCodeSystemVersion();
434
435                        IIdType target = storeCodeSystem(theRequestDetails, csv, codeSystem, null, null);
436                        return new UploadStatistics(terminologySet.getSize(), target);
437                }
438        }
439
440        @Override
441        public UploadStatistics loadDeltaAdd(
442                        String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
443                ourLog.info(
444                                "Processing terminology delta ADD for system[{}] with files: {}",
445                                theSystem,
446                                theFiles.stream().map(FileDescriptor::getFilename).collect(Collectors.toList()));
447                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
448                        CustomTerminologySet terminologySet = CustomTerminologySet.load(descriptors, false);
449                        return myCodeSystemStorageSvc.applyDeltaCodeSystemsAdd(theSystem, terminologySet);
450                }
451        }
452
453        @Override
454        public UploadStatistics loadDeltaRemove(
455                        String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
456                ourLog.info(
457                                "Processing terminology delta REMOVE for system[{}] with files: {}",
458                                theSystem,
459                                theFiles.stream().map(FileDescriptor::getFilename).collect(Collectors.toList()));
460                try (LoadedFileDescriptors descriptors = getLoadedFileDescriptors(theFiles)) {
461                        CustomTerminologySet terminologySet = CustomTerminologySet.load(descriptors, true);
462                        return myCodeSystemStorageSvc.applyDeltaCodeSystemsRemove(theSystem, terminologySet);
463                }
464        }
465
466        private void dropCircularRefs(
467                        TermConcept theConcept, ArrayList<String> theChain, Map<String, TermConcept> theCode2concept) {
468
469                theChain.add(theConcept.getCode());
470                for (Iterator<TermConceptParentChildLink> childIter =
471                                                theConcept.getChildren().iterator();
472                                childIter.hasNext(); ) {
473                        TermConceptParentChildLink next = childIter.next();
474                        TermConcept nextChild = next.getChild();
475                        if (theChain.contains(nextChild.getCode())) {
476
477                                StringBuilder b = new StringBuilder();
478                                b.append("Removing circular reference code ");
479                                b.append(nextChild.getCode());
480                                b.append(" from parent ");
481                                b.append(next.getParent().getCode());
482                                b.append(". Chain was: ");
483                                for (String nextInChain : theChain) {
484                                        TermConcept nextCode = theCode2concept.get(nextInChain);
485                                        b.append(nextCode.getCode());
486                                        b.append('[');
487                                        b.append(StringUtils.substring(nextCode.getDisplay(), 0, 20)
488                                                        .replace("[", "")
489                                                        .replace("]", "")
490                                                        .trim());
491                                        b.append("] ");
492                                }
493                                ourLog.info(b.toString(), theConcept.getCode());
494                                childIter.remove();
495                                nextChild.getParents().remove(next);
496
497                        } else {
498                                dropCircularRefs(nextChild, theChain, theCode2concept);
499                        }
500                }
501                theChain.remove(theChain.size() - 1);
502        }
503
504        @VisibleForTesting
505        @Nonnull
506        Properties getProperties(LoadedFileDescriptors theDescriptors, String thePropertiesFile) {
507                Properties retVal = new Properties();
508
509                try (InputStream propertyStream = ca.uhn.fhir.jpa.term.TermLoaderSvcImpl.class.getResourceAsStream(
510                                "/ca/uhn/fhir/jpa/term/loinc/loincupload.properties")) {
511                        retVal.load(propertyStream);
512                } catch (IOException e) {
513                        throw new InternalErrorException(Msg.code(866) + "Failed to process loinc.properties", e);
514                }
515
516                for (FileDescriptor next : theDescriptors.getUncompressedFileDescriptors()) {
517                        if (next.getFilename().endsWith(thePropertiesFile)) {
518                                try {
519                                        try (InputStream inputStream = next.getInputStream()) {
520                                                retVal.load(inputStream);
521                                        }
522                                } catch (IOException e) {
523                                        throw new InternalErrorException(Msg.code(867) + "Failed to read " + thePropertiesFile, e);
524                                }
525                        }
526                }
527                return retVal;
528        }
529
530        private Optional<String> loadFile(LoadedFileDescriptors theDescriptors, String... theFilenames) {
531                for (FileDescriptor next : theDescriptors.getUncompressedFileDescriptors()) {
532                        for (String nextFilename : theFilenames) {
533                                if (next.getFilename().endsWith(nextFilename)) {
534                                        try {
535                                                String contents = IOUtils.toString(next.getInputStream(), Charsets.UTF_8);
536                                                return Optional.of(contents);
537                                        } catch (IOException e) {
538                                                throw new InternalErrorException(Msg.code(868) + e);
539                                        }
540                                }
541                        }
542                }
543                return Optional.empty();
544        }
545
546        private UploadStatistics processImgthlaFiles(
547                        LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
548                final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
549                final List<ValueSet> valueSets = new ArrayList<>();
550                final List<ConceptMap> conceptMaps = new ArrayList<>();
551
552                CodeSystem imgthlaCs;
553                try {
554                        String imgthlaCsString = IOUtils.toString(
555                                        TermReadSvcImpl.class.getResourceAsStream("/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml"),
556                                        Charsets.UTF_8);
557                        imgthlaCs = FhirContext.forR4Cached().newXmlParser().parseResource(CodeSystem.class, imgthlaCsString);
558                } catch (IOException e) {
559                        throw new InternalErrorException(Msg.code(869) + "Failed to load imgthla.xml", e);
560                }
561
562                boolean foundHlaNom = false;
563                boolean foundHlaXml = false;
564                for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) {
565                        String nextFilename = nextZipBytes.getFilename();
566
567                        if (!IMGTHLA_HLA_NOM_TXT.equals(nextFilename)
568                                        && !nextFilename.endsWith("/" + IMGTHLA_HLA_NOM_TXT)
569                                        && !IMGTHLA_HLA_XML.equals(nextFilename)
570                                        && !nextFilename.endsWith("/" + IMGTHLA_HLA_XML)) {
571                                ourLog.info("Skipping unexpected file {}", nextFilename);
572                                continue;
573                        }
574
575                        if (IMGTHLA_HLA_NOM_TXT.equals(nextFilename) || nextFilename.endsWith("/" + IMGTHLA_HLA_NOM_TXT)) {
576                                // process colon-delimited hla_nom.txt file
577                                ourLog.info("Processing file {}", nextFilename);
578
579                                //                              IRecordHandler handler = new HlaNomTxtHandler(codeSystemVersion, code2concept,
580                                // propertyNamesToTypes);
581                                //                              AntigenSource antigenSource = new WmdaAntigenSource(hlaNomFilename, relSerSerFilename,
582                                // relDnaSerFilename);
583
584                                Reader reader = null;
585                                try {
586                                        reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
587
588                                        LineNumberReader lnr = new LineNumberReader(reader);
589                                        while (lnr.readLine() != null) {}
590                                        ourLog.warn("Lines read from {}:  {}", nextFilename, lnr.getLineNumber());
591
592                                } catch (IOException e) {
593                                        throw new InternalErrorException(Msg.code(870) + e);
594                                } finally {
595                                        IOUtils.closeQuietly(reader);
596                                }
597
598                                foundHlaNom = true;
599                        }
600
601                        if (IMGTHLA_HLA_XML.equals(nextFilename) || nextFilename.endsWith("/" + IMGTHLA_HLA_XML)) {
602                                // process hla.xml file
603                                ourLog.info("Processing file {}", nextFilename);
604
605                                //                              IRecordHandler handler = new HlaXmlHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
606                                //                              AlleleSource alleleSource = new HlaXmlAlleleSource(hlaXmlFilename);
607
608                                Reader reader = null;
609                                try {
610                                        reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
611
612                                        LineNumberReader lnr = new LineNumberReader(reader);
613                                        while (lnr.readLine() != null) {}
614                                        ourLog.warn("Lines read from {}:  {}", nextFilename, lnr.getLineNumber());
615
616                                } catch (IOException e) {
617                                        throw new InternalErrorException(Msg.code(871) + e);
618                                } finally {
619                                        IOUtils.closeQuietly(reader);
620                                }
621
622                                foundHlaXml = true;
623                        }
624                }
625
626                if (!foundHlaNom) {
627                        throw new InvalidRequestException(Msg.code(872) + "Did not find file matching " + IMGTHLA_HLA_NOM_TXT);
628                }
629
630                if (!foundHlaXml) {
631                        throw new InvalidRequestException(Msg.code(873) + "Did not find file matching " + IMGTHLA_HLA_XML);
632                }
633
634                int valueSetCount = valueSets.size();
635                int rootConceptCount = codeSystemVersion.getConcepts().size();
636                ourLog.info(
637                                "Have {} total concepts, {} root concepts, {} ValueSets",
638                                rootConceptCount,
639                                rootConceptCount,
640                                valueSetCount);
641
642                // remove this when fully implemented ...
643                throw new InternalErrorException(
644                                Msg.code(874) + "HLA nomenclature terminology upload not yet fully implemented.");
645
646                //              IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, imgthlaCs, valueSets, conceptMaps);
647                //
648                //              return new UploadStatistics(conceptCount, target);
649        }
650
651        UploadStatistics processLoincFiles(
652                        LoadedFileDescriptors theDescriptors,
653                        RequestDetails theRequestDetails,
654                        Properties theUploadProperties,
655                        Boolean theCloseFiles) {
656                final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
657                final Map<String, TermConcept> code2concept = new HashMap<>();
658                final List<ValueSet> valueSets = new ArrayList<>();
659                final List<ConceptMap> conceptMaps = new ArrayList<>();
660
661                final List<LoincLinguisticVariantsHandler.LinguisticVariant> linguisticVariants = new ArrayList<>();
662
663                LoincXmlFileZipContentsHandler loincXmlHandler = getLoincXmlFileZipContentsHandler();
664                iterateOverZipFile(theDescriptors, "loinc.xml", false, false, loincXmlHandler);
665                String loincCsString = loincXmlHandler.getContents();
666                if (isBlank(loincCsString)) {
667                        throw new InvalidRequestException(Msg.code(875) + "Did not find loinc.xml in the ZIP distribution.");
668                }
669
670                CodeSystem loincCs = FhirContext.forR4Cached().newXmlParser().parseResource(CodeSystem.class, loincCsString);
671                if (isNotBlank(loincCs.getVersion())) {
672                        throw new InvalidRequestException(
673                                        Msg.code(876) + "'loinc.xml' file must not have a version defined. To define a version use '"
674                                                        + LOINC_CODESYSTEM_VERSION.getCode() + "' property of 'loincupload.properties' file");
675                }
676
677                String codeSystemVersionId = theUploadProperties.getProperty(LOINC_CODESYSTEM_VERSION.getCode());
678                if (codeSystemVersionId != null) {
679                        loincCs.setVersion(codeSystemVersionId);
680                        loincCs.setId(loincCs.getId() + "-" + codeSystemVersionId);
681                }
682
683                Map<String, CodeSystem.PropertyType> propertyNamesToTypes = new HashMap<>();
684                for (CodeSystem.PropertyComponent nextProperty : loincCs.getProperty()) {
685                        String nextPropertyCode = nextProperty.getCode();
686                        CodeSystem.PropertyType nextPropertyType = nextProperty.getType();
687                        if (isNotBlank(nextPropertyCode)) {
688                                propertyNamesToTypes.put(nextPropertyCode, nextPropertyType);
689                        }
690                }
691
692                // TODO: DM 2019-09-13 - Manually add EXTERNAL_COPYRIGHT_NOTICE property until Regenstrief adds this to
693                // loinc.xml
694                if (!propertyNamesToTypes.containsKey("EXTERNAL_COPYRIGHT_NOTICE")) {
695                        String externalCopyRightNoticeCode = "EXTERNAL_COPYRIGHT_NOTICE";
696                        CodeSystem.PropertyType externalCopyRightNoticeType = CodeSystem.PropertyType.STRING;
697                        propertyNamesToTypes.put(externalCopyRightNoticeCode, externalCopyRightNoticeType);
698                }
699
700                IZipContentsHandlerCsv handler;
701
702                // Part
703                handler = new LoincPartHandler(codeSystemVersion, code2concept);
704                iterateOverZipFileCsv(
705                                theDescriptors,
706                                theUploadProperties.getProperty(LOINC_PART_FILE.getCode(), LOINC_PART_FILE_DEFAULT.getCode()),
707                                handler,
708                                ',',
709                                QuoteMode.NON_NUMERIC,
710                                false);
711                Map<PartTypeAndPartName, String> partTypeAndPartNameToPartNumber =
712                                ((LoincPartHandler) handler).getPartTypeAndPartNameToPartNumber();
713
714                // LOINC string properties
715                handler = new LoincHandler(
716                                codeSystemVersion, code2concept, propertyNamesToTypes, partTypeAndPartNameToPartNumber);
717                iterateOverZipFileCsv(
718                                theDescriptors,
719                                theUploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()),
720                                handler,
721                                ',',
722                                QuoteMode.NON_NUMERIC,
723                                false);
724
725                // LOINC hierarchy
726                handler = new LoincHierarchyHandler(codeSystemVersion, code2concept);
727                iterateOverZipFileCsv(
728                                theDescriptors,
729                                theUploadProperties.getProperty(LOINC_HIERARCHY_FILE.getCode(), LOINC_HIERARCHY_FILE_DEFAULT.getCode()),
730                                handler,
731                                ',',
732                                QuoteMode.NON_NUMERIC,
733                                false);
734
735                // Answer lists (ValueSets of potential answers/values for LOINC "questions")
736                handler = new LoincAnswerListHandler(
737                                codeSystemVersion, code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
738                iterateOverZipFileCsv(
739                                theDescriptors,
740                                theUploadProperties.getProperty(
741                                                LOINC_ANSWERLIST_FILE.getCode(), LOINC_ANSWERLIST_FILE_DEFAULT.getCode()),
742                                handler,
743                                ',',
744                                QuoteMode.NON_NUMERIC,
745                                false);
746
747                // Answer list links (connects LOINC observation codes to answer list codes)
748                handler = new LoincAnswerListLinkHandler(code2concept);
749                iterateOverZipFileCsv(
750                                theDescriptors,
751                                theUploadProperties.getProperty(
752                                                LOINC_ANSWERLIST_LINK_FILE.getCode(), LOINC_ANSWERLIST_LINK_FILE_DEFAULT.getCode()),
753                                handler,
754                                ',',
755                                QuoteMode.NON_NUMERIC,
756                                false);
757
758                // RSNA playbook
759                // Note that this should come before the "Part Related Code Mapping"
760                // file because there are some duplicate mappings between these
761                // two files, and the RSNA Playbook file has more metadata
762                handler = new LoincRsnaPlaybookHandler(
763                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
764                iterateOverZipFileCsv(
765                                theDescriptors,
766                                theUploadProperties.getProperty(
767                                                LOINC_RSNA_PLAYBOOK_FILE.getCode(), LOINC_RSNA_PLAYBOOK_FILE_DEFAULT.getCode()),
768                                handler,
769                                ',',
770                                QuoteMode.NON_NUMERIC,
771                                false);
772
773                // Part related code mapping
774                handler = new LoincPartRelatedCodeMappingHandler(
775                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
776                iterateOverZipFileCsv(
777                                theDescriptors,
778                                theUploadProperties.getProperty(
779                                                LOINC_PART_RELATED_CODE_MAPPING_FILE.getCode(),
780                                                LOINC_PART_RELATED_CODE_MAPPING_FILE_DEFAULT.getCode()),
781                                handler,
782                                ',',
783                                QuoteMode.NON_NUMERIC,
784                                false);
785
786                // Document ontology
787                handler = new LoincDocumentOntologyHandler(
788                                code2concept,
789                                propertyNamesToTypes,
790                                valueSets,
791                                conceptMaps,
792                                theUploadProperties,
793                                loincCs.getCopyright());
794                iterateOverZipFileCsv(
795                                theDescriptors,
796                                theUploadProperties.getProperty(
797                                                LOINC_DOCUMENT_ONTOLOGY_FILE.getCode(), LOINC_DOCUMENT_ONTOLOGY_FILE_DEFAULT.getCode()),
798                                handler,
799                                ',',
800                                QuoteMode.NON_NUMERIC,
801                                false);
802
803                // Top 2000 codes - US
804                handler = new LoincTop2000LabResultsUsHandler(
805                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
806                iterateOverZipFileCsvOptional(
807                                theDescriptors,
808                                theUploadProperties.getProperty(
809                                                LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE.getCode(),
810                                                LOINC_TOP2000_COMMON_LAB_RESULTS_US_FILE_DEFAULT.getCode()),
811                                handler,
812                                ',',
813                                QuoteMode.NON_NUMERIC,
814                                false);
815
816                // Top 2000 codes - SI
817                handler = new LoincTop2000LabResultsSiHandler(
818                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
819                iterateOverZipFileCsvOptional(
820                                theDescriptors,
821                                theUploadProperties.getProperty(
822                                                LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE.getCode(),
823                                                LOINC_TOP2000_COMMON_LAB_RESULTS_SI_FILE_DEFAULT.getCode()),
824                                handler,
825                                ',',
826                                QuoteMode.NON_NUMERIC,
827                                false);
828
829                // Universal lab order ValueSet
830                handler = new LoincUniversalOrderSetHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
831                iterateOverZipFileCsv(
832                                theDescriptors,
833                                theUploadProperties.getProperty(
834                                                LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE.getCode(),
835                                                LOINC_UNIVERSAL_LAB_ORDER_VALUESET_FILE_DEFAULT.getCode()),
836                                handler,
837                                ',',
838                                QuoteMode.NON_NUMERIC,
839                                false);
840
841                // IEEE medical device codes
842                handler = new LoincIeeeMedicalDeviceCodeHandler(
843                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
844                iterateOverZipFileCsv(
845                                theDescriptors,
846                                theUploadProperties.getProperty(
847                                                LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE.getCode(),
848                                                LOINC_IEEE_MEDICAL_DEVICE_CODE_MAPPING_TABLE_FILE_DEFAULT.getCode()),
849                                handler,
850                                ',',
851                                QuoteMode.NON_NUMERIC,
852                                false);
853
854                // Imaging document codes
855                handler = new LoincImagingDocumentCodeHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
856                iterateOverZipFileCsv(
857                                theDescriptors,
858                                theUploadProperties.getProperty(
859                                                LOINC_IMAGING_DOCUMENT_CODES_FILE.getCode(),
860                                                LOINC_IMAGING_DOCUMENT_CODES_FILE_DEFAULT.getCode()),
861                                handler,
862                                ',',
863                                QuoteMode.NON_NUMERIC,
864                                false);
865
866                // Group
867                handler = new LoincGroupFileHandler(
868                                code2concept, valueSets, conceptMaps, theUploadProperties, loincCs.getCopyright());
869                iterateOverZipFileCsv(
870                                theDescriptors,
871                                theUploadProperties.getProperty(LOINC_GROUP_FILE.getCode(), LOINC_GROUP_FILE_DEFAULT.getCode()),
872                                handler,
873                                ',',
874                                QuoteMode.NON_NUMERIC,
875                                false);
876
877                // Group terms
878                handler = new LoincGroupTermsFileHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
879                iterateOverZipFileCsv(
880                                theDescriptors,
881                                theUploadProperties.getProperty(
882                                                LOINC_GROUP_TERMS_FILE.getCode(), LOINC_GROUP_TERMS_FILE_DEFAULT.getCode()),
883                                handler,
884                                ',',
885                                QuoteMode.NON_NUMERIC,
886                                false);
887
888                // Parent group
889                handler = new LoincParentGroupFileHandler(code2concept, valueSets, conceptMaps, theUploadProperties);
890                iterateOverZipFileCsv(
891                                theDescriptors,
892                                theUploadProperties.getProperty(
893                                                LOINC_PARENT_GROUP_FILE.getCode(), LOINC_PARENT_GROUP_FILE_DEFAULT.getCode()),
894                                handler,
895                                ',',
896                                QuoteMode.NON_NUMERIC,
897                                false);
898
899                // Part link
900                handler = new LoincPartLinkHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
901                iterateOverZipFileCsvOptional(
902                                theDescriptors,
903                                theUploadProperties.getProperty(LOINC_PART_LINK_FILE.getCode(), LOINC_PART_LINK_FILE_DEFAULT.getCode()),
904                                handler,
905                                ',',
906                                QuoteMode.NON_NUMERIC,
907                                false);
908                iterateOverZipFileCsvOptional(
909                                theDescriptors,
910                                theUploadProperties.getProperty(
911                                                LOINC_PART_LINK_FILE_PRIMARY.getCode(), LOINC_PART_LINK_FILE_PRIMARY_DEFAULT.getCode()),
912                                handler,
913                                ',',
914                                QuoteMode.NON_NUMERIC,
915                                false);
916                iterateOverZipFileCsvOptional(
917                                theDescriptors,
918                                theUploadProperties.getProperty(
919                                                LOINC_PART_LINK_FILE_SUPPLEMENTARY.getCode(),
920                                                LOINC_PART_LINK_FILE_SUPPLEMENTARY_DEFAULT.getCode()),
921                                handler,
922                                ',',
923                                QuoteMode.NON_NUMERIC,
924                                false);
925
926                // Consumer Name
927                handler = new LoincConsumerNameHandler(code2concept);
928                iterateOverZipFileCsvOptional(
929                                theDescriptors,
930                                theUploadProperties.getProperty(
931                                                LOINC_CONSUMER_NAME_FILE.getCode(), LOINC_CONSUMER_NAME_FILE_DEFAULT.getCode()),
932                                handler,
933                                ',',
934                                QuoteMode.NON_NUMERIC,
935                                false);
936
937                // LOINC coding properties (must run after all TermConcepts were created)
938                handler = new LoincCodingPropertiesHandler(code2concept, propertyNamesToTypes);
939                iterateOverZipFileCsv(
940                                theDescriptors,
941                                theUploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()),
942                                handler,
943                                ',',
944                                QuoteMode.NON_NUMERIC,
945                                false);
946
947                // Linguistic Variants
948                handler = new LoincLinguisticVariantsHandler(linguisticVariants);
949                iterateOverZipFileCsvOptional(
950                                theDescriptors,
951                                theUploadProperties.getProperty(
952                                                LOINC_LINGUISTIC_VARIANTS_FILE.getCode(), LOINC_LINGUISTIC_VARIANTS_FILE_DEFAULT.getCode()),
953                                handler,
954                                ',',
955                                QuoteMode.NON_NUMERIC,
956                                false);
957
958                String langFileName;
959                for (LoincLinguisticVariantsHandler.LinguisticVariant linguisticVariant : linguisticVariants) {
960                        handler = new LoincLinguisticVariantHandler(code2concept, linguisticVariant.getLanguageCode());
961                        langFileName = linguisticVariant.getLinguisticVariantFileName();
962                        iterateOverZipFileCsvOptional(
963                                        theDescriptors,
964                                        theUploadProperties.getProperty(
965                                                        LOINC_LINGUISTIC_VARIANTS_PATH.getCode() + langFileName,
966                                                        LOINC_LINGUISTIC_VARIANTS_PATH_DEFAULT.getCode() + langFileName),
967                                        handler,
968                                        ',',
969                                        QuoteMode.NON_NUMERIC,
970                                        false);
971                }
972
973                if (theDescriptors.isOptionalFilesExist(List.of(
974                                theUploadProperties.getProperty(LOINC_MAPTO_FILE.getCode(), LOINC_MAPTO_FILE_DEFAULT.getCode())))) {
975                        // LOINC MapTo codes (last to make sure that all concepts were added to code2concept map)
976                        handler = new LoincMapToHandler(code2concept);
977                        iterateOverZipFileCsv(
978                                        theDescriptors,
979                                        theUploadProperties.getProperty(LOINC_MAPTO_FILE.getCode(), LOINC_MAPTO_FILE_DEFAULT.getCode()),
980                                        handler,
981                                        ',',
982                                        QuoteMode.NON_NUMERIC,
983                                        false);
984                }
985
986                if (theCloseFiles) {
987                        IOUtils.closeQuietly(theDescriptors);
988                }
989
990                valueSets.add(getValueSetLoincAll(theUploadProperties, loincCs.getCopyright()));
991
992                for (Entry<String, TermConcept> next : code2concept.entrySet()) {
993                        TermConcept nextConcept = next.getValue();
994                        if (nextConcept.getParents().isEmpty()) {
995                                codeSystemVersion.getConcepts().add(nextConcept);
996                        }
997                }
998
999                int valueSetCount = valueSets.size();
1000                int rootConceptCount = codeSystemVersion.getConcepts().size();
1001                int conceptCount = code2concept.size();
1002                ourLog.info(
1003                                "Have {} total concepts, {} root concepts, {} ValueSets",
1004                                conceptCount,
1005                                rootConceptCount,
1006                                valueSetCount);
1007
1008                IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, loincCs, valueSets, conceptMaps);
1009
1010                return new UploadStatistics(conceptCount, target);
1011        }
1012
1013        @VisibleForTesting
1014        protected LoincXmlFileZipContentsHandler getLoincXmlFileZipContentsHandler() {
1015                return new LoincXmlFileZipContentsHandler();
1016        }
1017
1018        private ValueSet getValueSetLoincAll(Properties theUploadProperties, String theCopyrightStatement) {
1019                ValueSet retVal = new ValueSet();
1020
1021                String codeSystemVersionId = theUploadProperties.getProperty(LOINC_CODESYSTEM_VERSION.getCode());
1022                String valueSetId;
1023                if (codeSystemVersionId != null) {
1024                        valueSetId = LOINC_ALL_VALUESET_ID + "-" + codeSystemVersionId;
1025                } else {
1026                        valueSetId = LOINC_ALL_VALUESET_ID;
1027                }
1028                retVal.setId(valueSetId);
1029                retVal.setUrl("http://loinc.org/vs");
1030                retVal.setVersion(codeSystemVersionId);
1031                retVal.setName("All LOINC codes");
1032                retVal.setStatus(Enumerations.PublicationStatus.ACTIVE);
1033                retVal.setDate(new Date());
1034                retVal.setPublisher("Regenstrief Institute, Inc.");
1035                retVal.setDescription("A value set that includes all LOINC codes");
1036                retVal.setCopyright(theCopyrightStatement);
1037                retVal.getCompose().addInclude().setSystem(ITermLoaderSvc.LOINC_URI).setVersion(codeSystemVersionId);
1038
1039                return retVal;
1040        }
1041
1042        private UploadStatistics processSnomedCtFiles(
1043                        LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
1044                final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
1045                final Map<String, TermConcept> id2concept = new HashMap<>();
1046                final Map<String, TermConcept> code2concept = new HashMap<>();
1047                final Set<String> validConceptIds = new HashSet<>();
1048
1049                IZipContentsHandlerCsv handler = new SctHandlerConcept(validConceptIds);
1050                iterateOverZipFileCsv(theDescriptors, SCT_FILE_CONCEPT, handler, '\t', null, true);
1051
1052                ourLog.info("Have {} valid concept IDs", validConceptIds.size());
1053
1054                handler = new SctHandlerDescription(validConceptIds, code2concept, id2concept, codeSystemVersion);
1055                iterateOverZipFileCsv(theDescriptors, SCT_FILE_DESCRIPTION, handler, '\t', null, true);
1056
1057                ourLog.info("Got {} concepts, cloning map", code2concept.size());
1058                final HashMap<String, TermConcept> rootConcepts = new HashMap<>(code2concept);
1059
1060                handler = new SctHandlerRelationship(codeSystemVersion, rootConcepts, code2concept);
1061                iterateOverZipFileCsv(theDescriptors, SCT_FILE_RELATIONSHIP, handler, '\t', null, true);
1062
1063                IOUtils.closeQuietly(theDescriptors);
1064
1065                ourLog.info("Looking for root codes");
1066                rootConcepts
1067                                .entrySet()
1068                                .removeIf(theStringTermConceptEntry ->
1069                                                !theStringTermConceptEntry.getValue().getParents().isEmpty());
1070
1071                ourLog.info(
1072                                "Done loading SNOMED CT files - {} root codes, {} total codes",
1073                                rootConcepts.size(),
1074                                code2concept.size());
1075
1076                Counter circularCounter = new Counter();
1077                for (TermConcept next : rootConcepts.values()) {
1078                        long count = circularCounter.getThenAdd();
1079                        float pct = ((float) count / rootConcepts.size()) * 100.0f;
1080                        ourLog.info(
1081                                        " * Scanning for circular refs - have scanned {} / {} codes ({}%)",
1082                                        count, rootConcepts.size(), pct);
1083                        dropCircularRefs(next, new ArrayList<>(), code2concept);
1084                }
1085
1086                codeSystemVersion.getConcepts().addAll(rootConcepts.values());
1087
1088                CodeSystem cs = new org.hl7.fhir.r4.model.CodeSystem();
1089                cs.setUrl(SCT_URI);
1090                cs.setName("SNOMED CT");
1091                cs.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
1092                cs.setStatus(Enumerations.PublicationStatus.ACTIVE);
1093                IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, cs, null, null);
1094
1095                return new UploadStatistics(code2concept.size(), target);
1096        }
1097
1098        private IIdType storeCodeSystem(
1099                        RequestDetails theRequestDetails,
1100                        final TermCodeSystemVersion theCodeSystemVersion,
1101                        CodeSystem theCodeSystem,
1102                        List<ValueSet> theValueSets,
1103                        List<ConceptMap> theConceptMaps) {
1104                Validate.isTrue(theCodeSystem.getContent() == CodeSystem.CodeSystemContentMode.NOTPRESENT);
1105
1106                List<ValueSet> valueSets = ObjectUtils.defaultIfNull(theValueSets, Collections.emptyList());
1107                List<ConceptMap> conceptMaps = ObjectUtils.defaultIfNull(theConceptMaps, Collections.emptyList());
1108
1109                IIdType retVal;
1110                myDeferredStorageSvc.setProcessDeferred(false);
1111                retVal = myCodeSystemStorageSvc.storeNewCodeSystemVersion(
1112                                theCodeSystem, theCodeSystemVersion, theRequestDetails, valueSets, conceptMaps);
1113                myDeferredStorageSvc.setProcessDeferred(true);
1114
1115                return retVal;
1116        }
1117
1118        public static void iterateOverZipFileCsv(
1119                        LoadedFileDescriptors theDescriptors,
1120                        String theFileNamePart,
1121                        IZipContentsHandlerCsv theHandler,
1122                        char theDelimiter,
1123                        QuoteMode theQuoteMode,
1124                        boolean theIsPartialFilename) {
1125                iterateOverZipFileCsv(
1126                                theDescriptors, theFileNamePart, theHandler, theDelimiter, theQuoteMode, theIsPartialFilename, true);
1127        }
1128
1129        public static void iterateOverZipFileCsvOptional(
1130                        LoadedFileDescriptors theDescriptors,
1131                        String theFileNamePart,
1132                        IZipContentsHandlerCsv theHandler,
1133                        char theDelimiter,
1134                        QuoteMode theQuoteMode,
1135                        boolean theIsPartialFilename) {
1136                iterateOverZipFileCsv(
1137                                theDescriptors, theFileNamePart, theHandler, theDelimiter, theQuoteMode, theIsPartialFilename, false);
1138        }
1139
1140        private static void iterateOverZipFileCsv(
1141                        LoadedFileDescriptors theDescriptors,
1142                        String theFileNamePart,
1143                        IZipContentsHandlerCsv theHandler,
1144                        char theDelimiter,
1145                        QuoteMode theQuoteMode,
1146                        boolean theIsPartialFilename,
1147                        boolean theRequireMatch) {
1148                IZipContentsHandler handler = (reader, filename) -> {
1149                        CSVParser parsed = newCsvRecords(theDelimiter, theQuoteMode, reader);
1150                        Iterator<CSVRecord> iter = parsed.iterator();
1151                        ourLog.debug("Header map: {}", parsed.getHeaderMap());
1152
1153                        int count = 0;
1154                        int nextLoggedCount = 0;
1155                        while (iter.hasNext()) {
1156                                CSVRecord nextRecord = iter.next();
1157                                if (!nextRecord.isConsistent()) {
1158                                        continue;
1159                                }
1160                                theHandler.accept(nextRecord);
1161                                count++;
1162                                if (count >= nextLoggedCount) {
1163                                        ourLog.info(" * Processed {} records in {}", count, filename);
1164                                        nextLoggedCount += LOG_INCREMENT;
1165                                }
1166                        }
1167                };
1168
1169                iterateOverZipFile(theDescriptors, theFileNamePart, theIsPartialFilename, theRequireMatch, handler);
1170        }
1171
1172        private static void iterateOverZipFile(
1173                        LoadedFileDescriptors theDescriptors,
1174                        String theFileNamePart,
1175                        boolean theIsPartialFilename,
1176                        boolean theRequireMatch,
1177                        IZipContentsHandler theHandler) {
1178                boolean foundMatch = false;
1179                for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) {
1180                        String nextFilename = nextZipBytes.getFilename();
1181                        boolean matches;
1182                        if (theIsPartialFilename) {
1183                                matches = nextFilename.contains(theFileNamePart);
1184                        } else {
1185                                matches = nextFilename.endsWith("/" + theFileNamePart) || nextFilename.equals(theFileNamePart);
1186                        }
1187
1188                        if (matches) {
1189                                ourLog.info("Processing file {}", nextFilename);
1190                                foundMatch = true;
1191
1192                                try {
1193
1194                                        Reader reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
1195                                        theHandler.handle(reader, nextFilename);
1196
1197                                } catch (IOException e) {
1198                                        throw new InternalErrorException(Msg.code(877) + e);
1199                                }
1200                        }
1201                }
1202
1203                if (!foundMatch && theRequireMatch) {
1204                        throw new InvalidRequestException(Msg.code(878) + "Did not find file matching " + theFileNamePart);
1205                }
1206        }
1207
1208        @Nonnull
1209        private static CSVParser newCsvRecords(char theDelimiter, QuoteMode theQuoteMode, Reader theReader)
1210                        throws IOException {
1211                CSVParser parsed;
1212                CSVFormat format =
1213                                CSVFormat.newFormat(theDelimiter).withFirstRecordAsHeader().withTrim();
1214                if (theQuoteMode != null) {
1215                        format = format.withQuote('"').withQuoteMode(theQuoteMode);
1216                }
1217                parsed = new CSVParser(theReader, format);
1218                return parsed;
1219        }
1220
1221        public static String firstNonBlank(String... theStrings) {
1222                String retVal = "";
1223                for (String nextString : theStrings) {
1224                        if (isNotBlank(nextString)) {
1225                                retVal = nextString;
1226                                break;
1227                        }
1228                }
1229                return retVal;
1230        }
1231
1232        public static TermConcept getOrCreateConcept(Map<String, TermConcept> id2concept, String id) {
1233                TermConcept concept = id2concept.get(id);
1234                if (concept == null) {
1235                        concept = new TermConcept();
1236                        id2concept.put(id, concept);
1237                }
1238                return concept;
1239        }
1240
1241        public static TermConceptProperty getOrCreateConceptProperty(
1242                        Map<String, List<TermConceptProperty>> code2Properties, String code, String key) {
1243                List<TermConceptProperty> termConceptProperties = code2Properties.get(code);
1244                if (termConceptProperties == null) return new TermConceptProperty();
1245                Optional<TermConceptProperty> termConceptProperty = termConceptProperties.stream()
1246                                .filter(property -> key.equals(property.getKey()))
1247                                .findFirst();
1248                return termConceptProperty.orElseGet(TermConceptProperty::new);
1249        }
1250}