001/*- 002 * #%L 003 * HAPI FHIR JPA Model 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.jpa.model.search; 021 022import org.hibernate.search.engine.backend.document.DocumentElement; 023import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement; 024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField; 025import org.hibernate.search.engine.backend.types.Aggregable; 026import org.hibernate.search.engine.backend.types.ObjectStructure; 027import org.hibernate.search.engine.backend.types.Projectable; 028import org.hibernate.search.engine.backend.types.Searchable; 029import org.hibernate.search.engine.backend.types.Sortable; 030import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory; 031import org.hibernate.search.engine.backend.types.dsl.StandardIndexFieldTypeOptionsStep; 032import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep; 033import org.hibernate.search.mapper.pojo.bridge.PropertyBridge; 034import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext; 035import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder; 036import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import java.time.Instant; 041 042import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_EXACT; 043import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_LOWER; 044import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_NORMALIZED; 045import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_TEXT; 046import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.NUMBER_VALUE; 047import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE; 048import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE_NORM; 049import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_SYSTEM; 050import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE; 051import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE_NORM; 052import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.URI_VALUE; 053 054/** 055 * Allows hibernate search to index 056 * <p> 057 * CodeableConcept.text 058 * Coding.display 059 * Identifier.type.text 060 */ 061public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedHSearchIndexData> { 062 private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class); 063 064 public static final String SEARCH_PARAM_TEXT_PREFIX = "text-"; 065 public static final String LOWERCASE_ASCIIFOLDING_NORMALIZER = "lowercaseAsciifoldingNormalizer"; 066 067 @Override 068 public void bind(PropertyBindingContext thePropertyBindingContext) { 069 // TODO Is it safe to use object identity of the Map to track dirty? 070 // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource. 071 thePropertyBindingContext.dependencies().use("mySearchParamStrings").use("mySearchParamQuantities"); 072 073 defineIndexingTemplate(thePropertyBindingContext); 074 075 thePropertyBindingContext.bridge(ExtendedHSearchIndexData.class, this); 076 } 077 078 private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) { 079 IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement(); 080 081 // In order to support dynamic fields, we have to use field templates. We _must_ define the template at 082 // bootstrap time and cannot 083 // create them adhoc. 084 // https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic 085 // I _think_ im doing the right thing here by indicating that everything matching this template uses this 086 // analyzer. 087 IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory(); 088 // TODO mb Once Ken finishes extracting a common base, we can share these constants with 089 // HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer 090 StringIndexFieldTypeOptionsStep<?> standardAnalyzer = 091 indexFieldTypeFactory.asString().analyzer("standardAnalyzer").projectable(Projectable.NO); 092 093 StringIndexFieldTypeOptionsStep<?> lowerCaseNormalizer = indexFieldTypeFactory 094 .asString() 095 .normalizer(LOWERCASE_ASCIIFOLDING_NORMALIZER) 096 .sortable(Sortable.YES) 097 .projectable(Projectable.YES); 098 099 StringIndexFieldTypeOptionsStep<?> exactAnalyzer = indexFieldTypeFactory 100 .asString() 101 .analyzer("exactAnalyzer") // default max-length is 256. Is that enough for code system uris? 102 .projectable(Projectable.NO); 103 104 StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = 105 indexFieldTypeFactory.asString().analyzer("normStringAnalyzer").projectable(Projectable.NO); 106 107 StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory 108 .asString() 109 // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags 110 .projectable(Projectable.NO) 111 .sortable(Sortable.YES) 112 .aggregable(Aggregable.YES); 113 114 StandardIndexFieldTypeOptionsStep<?, Instant> dateTimeFieldType = 115 indexFieldTypeFactory.asInstant().projectable(Projectable.NO).sortable(Sortable.YES); 116 117 StandardIndexFieldTypeOptionsStep<?, Integer> dateTimeOrdinalFieldType = 118 indexFieldTypeFactory.asInteger().projectable(Projectable.NO).sortable(Sortable.YES); 119 120 StandardIndexFieldTypeOptionsStep<?, Double> bigDecimalFieldType = 121 indexFieldTypeFactory.asDouble().projectable(Projectable.NO).sortable(Sortable.YES); 122 123 StringIndexFieldTypeOptionsStep<?> forcedIdType = 124 indexFieldTypeFactory.asString().projectable(Projectable.YES).aggregable(Aggregable.NO); 125 126 // type to store payload fields that do not participate in search, only results 127 StringIndexFieldTypeOptionsStep<?> stringStorageType = indexFieldTypeFactory 128 .asString() 129 .searchable(Searchable.NO) 130 .projectable(Projectable.YES) 131 .aggregable(Aggregable.NO); 132 133 // the old style for _text and _contains 134 indexSchemaElement 135 .fieldTemplate("SearchParamText", standardAnalyzer) 136 .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*"); 137 138 indexSchemaElement.field("myForcedId", forcedIdType).toReference(); 139 140 indexSchemaElement.field("myRawResource", stringStorageType).toReference(); 141 142 // The following section is a bit ugly. We need to enforce order and dependency or the object matches will be 143 // too big. 144 { 145 IndexSchemaObjectField spfield = 146 indexSchemaElement.objectField(HSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); 147 spfield.toReference(); 148 IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField( 149 HSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); 150 nestedSpField.toReference(); 151 152 // Note: the lucene/elastic independent api is hurting a bit here. 153 // For lucene, we need a separate field for each analyzer. So we'll add string (for :exact), and text (for 154 // :text). 155 // They aren't marked stored, so there's no space cost beyond the index for each. 156 // But for elastic, we'd rather have a single field defined, with multi-field sub-fields. The index cost is 157 // the same, 158 // but elastic will actually store all fields in the source document and consume disk. 159 160 // So triplicate the storage for now. :-( 161 String stringPathGlob = "*.string"; 162 spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED) 163 .matchingPathGlob(stringPathGlob); 164 spfield.fieldTemplate("string-norm", normStringAnalyzer) 165 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED) 166 .multiValued(); 167 spfield.fieldTemplate("string-exact", exactAnalyzer) 168 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT) 169 .multiValued(); 170 spfield.fieldTemplate("string-text", standardAnalyzer) 171 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT) 172 .multiValued(); 173 spfield.fieldTemplate("string-lower", lowerCaseNormalizer) 174 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_LOWER) 175 .multiValued(); 176 177 nestedSpField 178 .objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED) 179 .matchingPathGlob(stringPathGlob); 180 nestedSpField 181 .fieldTemplate("string-norm", normStringAnalyzer) 182 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED) 183 .multiValued(); 184 nestedSpField 185 .fieldTemplate("string-text", standardAnalyzer) 186 .matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT) 187 .multiValued(); 188 189 // token 190 // Ideally, we'd store a single code-system string and use a custom tokenizer to 191 // generate "system|" "|code" and "system|code" tokens to support all three. 192 // But the standard tokenizers aren't that flexible. As second best, it would be nice to use elastic 193 // multi-fields 194 // to apply three different tokenizers to a single value. 195 // Instead, just be simple and expand into three full fields for now 196 String tokenPathGlob = "*.token"; 197 spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob); 198 spfield.fieldTemplate("token-code", keywordFieldType) 199 .matchingPathGlob(tokenPathGlob + ".code") 200 .multiValued(); 201 spfield.fieldTemplate("token-code-system", keywordFieldType) 202 .matchingPathGlob(tokenPathGlob + ".code-system") 203 .multiValued(); 204 spfield.fieldTemplate("token-system", keywordFieldType) 205 .matchingPathGlob(tokenPathGlob + ".system") 206 .multiValued(); 207 208 nestedSpField 209 .objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED) 210 .matchingPathGlob(tokenPathGlob); 211 nestedSpField 212 .fieldTemplate("token-code", keywordFieldType) 213 .matchingPathGlob(tokenPathGlob + ".code") 214 .multiValued(); 215 nestedSpField 216 .fieldTemplate("token-code-system", keywordFieldType) 217 .matchingPathGlob(tokenPathGlob + ".code-system") 218 .multiValued(); 219 nestedSpField 220 .fieldTemplate("token-system", keywordFieldType) 221 .matchingPathGlob(tokenPathGlob + ".system") 222 .multiValued(); 223 224 // reference 225 spfield.fieldTemplate("reference-value", keywordFieldType) 226 .matchingPathGlob("*.reference.value") 227 .multiValued(); 228 229 // uri 230 spfield.fieldTemplate("uriValueTemplate", keywordFieldType) 231 .matchingPathGlob("*." + URI_VALUE) 232 .multiValued(); 233 nestedSpField 234 .fieldTemplate("uriValueTemplate", keywordFieldType) 235 .matchingPathGlob("*." + URI_VALUE) 236 .multiValued(); 237 238 // number 239 spfield.fieldTemplate("numberValueTemplate", bigDecimalFieldType).matchingPathGlob("*." + NUMBER_VALUE); 240 nestedSpField 241 .fieldTemplate("numberValueTemplate", bigDecimalFieldType) 242 .matchingPathGlob("*." + NUMBER_VALUE); 243 244 // quantity 245 String quantityPathGlob = "*.quantity"; 246 nestedSpField 247 .objectFieldTemplate("quantityTemplate", ObjectStructure.FLATTENED) 248 .matchingPathGlob(quantityPathGlob); 249 nestedSpField 250 .fieldTemplate(QTY_SYSTEM, keywordFieldType) 251 .matchingPathGlob(quantityPathGlob + "." + QTY_SYSTEM); 252 nestedSpField.fieldTemplate(QTY_CODE, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE); 253 nestedSpField 254 .fieldTemplate(QTY_VALUE, bigDecimalFieldType) 255 .matchingPathGlob(quantityPathGlob + "." + QTY_VALUE); 256 nestedSpField 257 .fieldTemplate(QTY_CODE_NORM, keywordFieldType) 258 .matchingPathGlob(quantityPathGlob + "." + QTY_CODE_NORM); 259 nestedSpField 260 .fieldTemplate(QTY_VALUE_NORM, bigDecimalFieldType) 261 .matchingPathGlob(quantityPathGlob + "." + QTY_VALUE_NORM); 262 263 // date 264 String dateTimePathGlob = "*.dt"; 265 spfield.objectFieldTemplate("datetimeIndex", ObjectStructure.FLATTENED) 266 .matchingPathGlob(dateTimePathGlob); 267 spfield.fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType) 268 .matchingPathGlob(dateTimePathGlob + ".lower-ord") 269 .multiValued(); 270 spfield.fieldTemplate("datetime-lower-value", dateTimeFieldType) 271 .matchingPathGlob(dateTimePathGlob + ".lower") 272 .multiValued(); 273 spfield.fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType) 274 .matchingPathGlob(dateTimePathGlob + ".upper-ord") 275 .multiValued(); 276 spfield.fieldTemplate("datetime-upper-value", dateTimeFieldType) 277 .matchingPathGlob(dateTimePathGlob + ".upper") 278 .multiValued(); 279 280 nestedSpField 281 .objectFieldTemplate("nestedDatetimeIndex", ObjectStructure.FLATTENED) 282 .matchingPathGlob(dateTimePathGlob); 283 nestedSpField 284 .fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType) 285 .matchingPathGlob(dateTimePathGlob + ".lower-ord") 286 .multiValued(); 287 nestedSpField 288 .fieldTemplate("datetime-lower-value", dateTimeFieldType) 289 .matchingPathGlob(dateTimePathGlob + ".lower") 290 .multiValued(); 291 nestedSpField 292 .fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType) 293 .matchingPathGlob(dateTimePathGlob + ".upper-ord") 294 .multiValued(); 295 nestedSpField 296 .fieldTemplate("datetime-upper-value", dateTimeFieldType) 297 .matchingPathGlob(dateTimePathGlob + ".upper") 298 .multiValued(); 299 300 // last, since the globs are matched in declaration order, and * matches even nested nodes. 301 spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*"); 302 303 // we use nested search params for the autocomplete search. 304 nestedSpField 305 .objectFieldTemplate("nestedSpSubObject", ObjectStructure.FLATTENED) 306 .matchingPathGlob("*.*") 307 .multiValued(); 308 nestedSpField 309 .objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED) 310 .matchingPathGlob("*") 311 .multiValued(); 312 } 313 } 314 315 @Override 316 public void write( 317 DocumentElement theDocument, 318 ExtendedHSearchIndexData theIndexData, 319 PropertyBridgeWriteContext thePropertyBridgeWriteContext) { 320 if (theIndexData != null) { 321 ourLog.trace("Writing index data for {}", theIndexData); 322 theIndexData.writeIndexElements(theDocument); 323 } 324 } 325}