
001package ca.uhn.fhir.jpa.model.search; 002 003/*- 004 * #%L 005 * HAPI FHIR JPA Model 006 * %% 007 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import org.hibernate.search.engine.backend.document.DocumentElement; 024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement; 025import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField; 026import org.hibernate.search.engine.backend.types.Aggregable; 027import org.hibernate.search.engine.backend.types.ObjectStructure; 028import org.hibernate.search.engine.backend.types.Projectable; 029import org.hibernate.search.engine.backend.types.Searchable; 030import org.hibernate.search.engine.backend.types.Sortable; 031import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory; 032import org.hibernate.search.engine.backend.types.dsl.StandardIndexFieldTypeOptionsStep; 033import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep; 034import org.hibernate.search.mapper.pojo.bridge.PropertyBridge; 035import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext; 036import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder; 037import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import java.time.Instant; 042 043import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_EXACT; 044import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_LOWER; 045import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_NORMALIZED; 046import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_TEXT; 047import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.NUMBER_VALUE; 048import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE; 049import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE_NORM; 050import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_SYSTEM; 051import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE; 052import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE_NORM; 053import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.URI_VALUE; 054 055/** 056 * Allows hibernate search to index 057 * <p> 058 * CodeableConcept.text 059 * Coding.display 060 * Identifier.type.text 061 */ 062public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedHSearchIndexData> { 063 private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class); 064 065 public static final String SEARCH_PARAM_TEXT_PREFIX = "text-"; 066 public static final String LOWERCASE_ASCIIFOLDING_NORMALIZER = "lowercaseAsciifoldingNormalizer"; 067 068 @Override 069 public void bind(PropertyBindingContext thePropertyBindingContext) { 070 // TODO Is it safe to use object identity of the Map to track dirty? 071 // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource. 072 thePropertyBindingContext.dependencies() 073 .use("mySearchParamStrings") 074 .use("mySearchParamQuantities"); 075 076 defineIndexingTemplate(thePropertyBindingContext); 077 078 thePropertyBindingContext.bridge(ExtendedHSearchIndexData.class, this); 079 } 080 081 private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) { 082 IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement(); 083 084 //In order to support dynamic fields, we have to use field templates. We _must_ define the template at bootstrap time and cannot 085 //create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic 086 //I _think_ im doing the right thing here by indicating that everything matching this template uses this analyzer. 087 IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory(); 088 // TODO mb Once Ken finishes extracting a common base, we can share these constants with HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer 089 StringIndexFieldTypeOptionsStep<?> standardAnalyzer = indexFieldTypeFactory.asString() 090 .analyzer("standardAnalyzer") 091 .projectable(Projectable.NO); 092 093 StringIndexFieldTypeOptionsStep<?> lowerCaseNormalizer = 094 indexFieldTypeFactory.asString() 095 .normalizer(LOWERCASE_ASCIIFOLDING_NORMALIZER) 096 .sortable(Sortable.YES) 097 .projectable(Projectable.YES); 098 099 StringIndexFieldTypeOptionsStep<?> exactAnalyzer = 100 indexFieldTypeFactory.asString() 101 .analyzer("exactAnalyzer") // default max-length is 256. Is that enough for code system uris? 102 .projectable(Projectable.NO); 103 104 StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = indexFieldTypeFactory.asString() 105 .analyzer("normStringAnalyzer") 106 .projectable(Projectable.NO); 107 108 StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory.asString() 109 // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags 110 .projectable(Projectable.NO) 111 .sortable(Sortable.YES) 112 .aggregable(Aggregable.YES); 113 114 StandardIndexFieldTypeOptionsStep<?, Instant> dateTimeFieldType = indexFieldTypeFactory.asInstant() 115 .projectable(Projectable.NO) 116 .sortable(Sortable.YES); 117 118 StandardIndexFieldTypeOptionsStep<?, Integer> dateTimeOrdinalFieldType = indexFieldTypeFactory.asInteger() 119 .projectable(Projectable.NO) 120 .sortable(Sortable.YES); 121 122 StandardIndexFieldTypeOptionsStep<?, Double> bigDecimalFieldType = indexFieldTypeFactory.asDouble() 123 .projectable(Projectable.NO) 124 .sortable(Sortable.YES); 125 126 StringIndexFieldTypeOptionsStep<?> forcedIdType = indexFieldTypeFactory.asString() 127 .projectable(Projectable.YES) 128 .aggregable(Aggregable.NO); 129 130 // type to store payload fields that do not participate in search, only results 131 StringIndexFieldTypeOptionsStep<?> stringStorageType = indexFieldTypeFactory.asString() 132 .searchable(Searchable.NO) 133 .projectable(Projectable.YES) 134 .aggregable(Aggregable.NO); 135 136 // the old style for _text and _contains 137 indexSchemaElement 138 .fieldTemplate("SearchParamText", standardAnalyzer) 139 .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*"); 140 141 142 indexSchemaElement.field("myForcedId", forcedIdType).toReference(); 143 144 indexSchemaElement.field("myRawResource", stringStorageType).toReference(); 145 146 // The following section is a bit ugly. We need to enforce order and dependency or the object matches will be too big. 147 { 148 IndexSchemaObjectField spfield = indexSchemaElement.objectField(HSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); 149 spfield.toReference(); 150 IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField(HSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); 151 nestedSpField.toReference(); 152 153 // TODO MB: the lucene/elastic independent api is hurting a bit here. 154 // For lucene, we need a separate field for each analyzer. So we'll add string (for :exact), and text (for :text). 155 // They aren't marked stored, so there's no space cost beyond the index for each. 156 // But for elastic, I'd rather have a single field defined, with multi-field sub-fields. The index cost is the same, 157 // but elastic will actually store all fields in the source document. 158 159 // So triplicate the storage for now. :-( 160 String stringPathGlob = "*.string"; 161 spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob); 162 spfield.fieldTemplate("string-norm", normStringAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED).multiValued(); 163 spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued(); 164 spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued(); 165 spfield.fieldTemplate("string-lower", lowerCaseNormalizer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_LOWER).multiValued(); 166 167 nestedSpField.objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob); 168 nestedSpField.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued(); 169 170 // token 171 // Ideally, we'd store a single code-system string and use a custom tokenizer to 172 // generate "system|" "|code" and "system|code" tokens to support all three. 173 // But the standard tokenizers aren't that flexible. As second best, it would be nice to use elastic multi-fields 174 // to apply three different tokenizers to a single value. 175 // Instead, just be simple and expand into three full fields for now 176 String tokenPathGlob = "*.token"; 177 spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob); 178 spfield.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued(); 179 spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued(); 180 spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued(); 181 182 nestedSpField.objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob); 183 nestedSpField.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued(); 184 nestedSpField.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued(); 185 nestedSpField.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued(); 186 187 // reference 188 spfield.fieldTemplate("reference-value", keywordFieldType).matchingPathGlob("*.reference.value").multiValued(); 189 190 // uri 191 spfield.fieldTemplate("uriValueTemplate", keywordFieldType).matchingPathGlob("*." + URI_VALUE).multiValued(); 192 193 // number 194 spfield.fieldTemplate("numberValueTemplate", bigDecimalFieldType).matchingPathGlob("*." + NUMBER_VALUE); 195 196 //quantity 197 String quantityPathGlob = "*.quantity"; 198 nestedSpField.objectFieldTemplate("quantityTemplate", ObjectStructure.FLATTENED).matchingPathGlob(quantityPathGlob); 199 nestedSpField.fieldTemplate(QTY_SYSTEM, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_SYSTEM); 200 nestedSpField.fieldTemplate(QTY_CODE, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE); 201 nestedSpField.fieldTemplate(QTY_VALUE, bigDecimalFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_VALUE); 202 nestedSpField.fieldTemplate(QTY_CODE_NORM, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE_NORM); 203 nestedSpField.fieldTemplate(QTY_VALUE_NORM, bigDecimalFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_VALUE_NORM); 204 205 // date 206 String dateTimePathGlob = "*.dt"; 207 spfield.objectFieldTemplate("datetimeIndex", ObjectStructure.FLATTENED).matchingPathGlob(dateTimePathGlob); 208 spfield.fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType).matchingPathGlob(dateTimePathGlob + ".lower-ord"); 209 spfield.fieldTemplate("datetime-lower-value", dateTimeFieldType).matchingPathGlob(dateTimePathGlob + ".lower"); 210 spfield.fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType).matchingPathGlob(dateTimePathGlob + ".upper-ord"); 211 spfield.fieldTemplate("datetime-upper-value", dateTimeFieldType).matchingPathGlob(dateTimePathGlob + ".upper"); 212 213 // last, since the globs are matched in declaration order, and * matches even nested nodes. 214 spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*"); 215 216 // we use nested search params for the autocomplete search. 217 nestedSpField.objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED).matchingPathGlob("*").multiValued(); 218 } 219 } 220 221 @Override 222 public void write(DocumentElement theDocument, ExtendedHSearchIndexData theIndexData, PropertyBridgeWriteContext thePropertyBridgeWriteContext) { 223 if (theIndexData != null) { 224 ourLog.trace("Writing index data for {}", theIndexData); 225 theIndexData.writeIndexElements(theDocument); 226 } 227 } 228 229}