001package ca.uhn.fhir.jpa.model.search;
002
003/*-
004 * #%L
005 * HAPI FHIR JPA Model
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.hibernate.search.engine.backend.document.DocumentElement;
024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement;
025import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField;
026import org.hibernate.search.engine.backend.types.Aggregable;
027import org.hibernate.search.engine.backend.types.ObjectStructure;
028import org.hibernate.search.engine.backend.types.Projectable;
029import org.hibernate.search.engine.backend.types.Searchable;
030import org.hibernate.search.engine.backend.types.Sortable;
031import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory;
032import org.hibernate.search.engine.backend.types.dsl.StandardIndexFieldTypeOptionsStep;
033import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep;
034import org.hibernate.search.mapper.pojo.bridge.PropertyBridge;
035import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext;
036import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder;
037import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import java.time.Instant;
042
043import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_EXACT;
044import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_LOWER;
045import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_NORMALIZED;
046import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_TEXT;
047import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.NUMBER_VALUE;
048import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE;
049import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE_NORM;
050import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_SYSTEM;
051import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE;
052import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE_NORM;
053import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.URI_VALUE;
054
055/**
056 * Allows hibernate search to index
057 * <p>
058 * CodeableConcept.text
059 * Coding.display
060 * Identifier.type.text
061 */
062public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedHSearchIndexData> {
063        private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class);
064
065        public static final String SEARCH_PARAM_TEXT_PREFIX = "text-";
066        public static final String LOWERCASE_ASCIIFOLDING_NORMALIZER = "lowercaseAsciifoldingNormalizer";
067
068        @Override
069        public void bind(PropertyBindingContext thePropertyBindingContext) {
070                // TODO Is it safe to use object identity of the Map to track dirty?
071                // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource.
072                thePropertyBindingContext.dependencies()
073                        .use("mySearchParamStrings")
074                        .use("mySearchParamQuantities");
075
076                defineIndexingTemplate(thePropertyBindingContext);
077
078                thePropertyBindingContext.bridge(ExtendedHSearchIndexData.class, this);
079        }
080
081        private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) {
082                IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement();
083
084                //In order to support dynamic fields, we have to use field templates. We _must_ define the template at bootstrap time and cannot
085                //create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic
086                //I _think_ im doing the right thing here by indicating that everything matching this template uses this analyzer.
087                IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory();
088                // TODO mb Once Ken finishes extracting a common base, we can share these constants with HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer
089                StringIndexFieldTypeOptionsStep<?> standardAnalyzer = indexFieldTypeFactory.asString()
090                        .analyzer("standardAnalyzer")
091                        .projectable(Projectable.NO);
092
093                StringIndexFieldTypeOptionsStep<?> lowerCaseNormalizer =
094                        indexFieldTypeFactory.asString()
095                                .normalizer(LOWERCASE_ASCIIFOLDING_NORMALIZER)
096                                .sortable(Sortable.YES)
097                                .projectable(Projectable.YES);
098
099                StringIndexFieldTypeOptionsStep<?> exactAnalyzer =
100                        indexFieldTypeFactory.asString()
101                                .analyzer("exactAnalyzer") // default max-length is 256.  Is that enough for code system uris?
102                                .projectable(Projectable.NO);
103
104                StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = indexFieldTypeFactory.asString()
105                        .analyzer("normStringAnalyzer")
106                        .projectable(Projectable.NO);
107
108                StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory.asString()
109                // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags
110                        .projectable(Projectable.NO)
111                        .sortable(Sortable.YES)
112                        .aggregable(Aggregable.YES);
113
114                StandardIndexFieldTypeOptionsStep<?, Instant> dateTimeFieldType = indexFieldTypeFactory.asInstant()
115                        .projectable(Projectable.NO)
116                        .sortable(Sortable.YES);
117
118                StandardIndexFieldTypeOptionsStep<?, Integer> dateTimeOrdinalFieldType = indexFieldTypeFactory.asInteger()
119                        .projectable(Projectable.NO)
120                        .sortable(Sortable.YES);
121
122                StandardIndexFieldTypeOptionsStep<?, Double> bigDecimalFieldType = indexFieldTypeFactory.asDouble()
123                        .projectable(Projectable.NO)
124                        .sortable(Sortable.YES);
125
126                StringIndexFieldTypeOptionsStep<?> forcedIdType = indexFieldTypeFactory.asString()
127                        .projectable(Projectable.YES)
128                        .aggregable(Aggregable.NO);
129
130                // type to store payload fields that do not participate in search, only results
131                StringIndexFieldTypeOptionsStep<?> stringStorageType = indexFieldTypeFactory.asString()
132                        .searchable(Searchable.NO)
133                        .projectable(Projectable.YES)
134                        .aggregable(Aggregable.NO);
135
136                // the old style for _text and _contains
137                indexSchemaElement
138                        .fieldTemplate("SearchParamText", standardAnalyzer)
139                        .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*");
140
141
142                indexSchemaElement.field("myForcedId", forcedIdType).toReference();
143
144                indexSchemaElement.field("myRawResource", stringStorageType).toReference();
145
146                // The following section is a bit ugly.  We need to enforce order and dependency or the object matches will be too big.
147                {
148                        IndexSchemaObjectField spfield = indexSchemaElement.objectField(HSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
149                        spfield.toReference();
150                        IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField(HSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
151                        nestedSpField.toReference();
152
153                        // TODO MB: the lucene/elastic independent api is hurting a bit here.
154                        // For lucene, we need a separate field for each analyzer.  So we'll add string (for :exact), and text (for :text).
155                        // They aren't marked stored, so there's no space cost beyond the index for each.
156                        // But for elastic, I'd rather have a single field defined, with multi-field sub-fields.  The index cost is the same,
157                        // but elastic will actually store all fields in the source document.
158
159                        // So triplicate the storage for now. :-(
160                        String stringPathGlob = "*.string";
161                        spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob);
162                        spfield.fieldTemplate("string-norm", normStringAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED).multiValued();
163                        spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued();
164                        spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued();
165                        spfield.fieldTemplate("string-lower", lowerCaseNormalizer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_LOWER).multiValued();
166
167                        nestedSpField.objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob);
168                        nestedSpField.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued();
169
170                        // token
171                        // Ideally, we'd store a single code-system string and use a custom tokenizer to
172                        // generate "system|" "|code" and "system|code" tokens to support all three.
173                        // But the standard tokenizers aren't that flexible.  As second best, it would be nice to use elastic multi-fields
174                        // to apply three different tokenizers to a single value.
175                        // Instead, just be simple and expand into three full fields for now
176                        String tokenPathGlob = "*.token";
177                        spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
178                        spfield.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued();
179                        spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
180                        spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
181
182                        nestedSpField.objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
183                        nestedSpField.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued();
184                        nestedSpField.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
185                        nestedSpField.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
186
187                        // reference
188                        spfield.fieldTemplate("reference-value", keywordFieldType).matchingPathGlob("*.reference.value").multiValued();
189
190                        // uri
191                        spfield.fieldTemplate("uriValueTemplate", keywordFieldType).matchingPathGlob("*." + URI_VALUE).multiValued();
192
193                        // number
194                        spfield.fieldTemplate("numberValueTemplate", bigDecimalFieldType).matchingPathGlob("*." + NUMBER_VALUE);
195
196                        //quantity
197                        String quantityPathGlob = "*.quantity";
198                        nestedSpField.objectFieldTemplate("quantityTemplate", ObjectStructure.FLATTENED).matchingPathGlob(quantityPathGlob);
199                        nestedSpField.fieldTemplate(QTY_SYSTEM, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_SYSTEM);
200                        nestedSpField.fieldTemplate(QTY_CODE, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE);
201                        nestedSpField.fieldTemplate(QTY_VALUE, bigDecimalFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_VALUE);
202                        nestedSpField.fieldTemplate(QTY_CODE_NORM, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE_NORM);
203                        nestedSpField.fieldTemplate(QTY_VALUE_NORM, bigDecimalFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_VALUE_NORM);
204
205                        // date
206                        String dateTimePathGlob = "*.dt";
207                        spfield.objectFieldTemplate("datetimeIndex", ObjectStructure.FLATTENED).matchingPathGlob(dateTimePathGlob);
208                        spfield.fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType).matchingPathGlob(dateTimePathGlob + ".lower-ord");
209                        spfield.fieldTemplate("datetime-lower-value", dateTimeFieldType).matchingPathGlob(dateTimePathGlob + ".lower");
210                        spfield.fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType).matchingPathGlob(dateTimePathGlob + ".upper-ord");
211                        spfield.fieldTemplate("datetime-upper-value", dateTimeFieldType).matchingPathGlob(dateTimePathGlob + ".upper");
212
213                        // last, since the globs are matched in declaration order, and * matches even nested nodes.
214                        spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*");
215
216                        // we use nested search params for the autocomplete search.
217                        nestedSpField.objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED).matchingPathGlob("*").multiValued();
218                }
219        }
220
221        @Override
222        public void write(DocumentElement theDocument, ExtendedHSearchIndexData theIndexData, PropertyBridgeWriteContext thePropertyBridgeWriteContext) {
223                if (theIndexData != null) {
224                        ourLog.trace("Writing index data for {}", theIndexData);
225                        theIndexData.writeIndexElements(theDocument);
226                }
227        }
228
229}