001package ca.uhn.fhir.jpa.search.elastic;
002
003/*-
004 * #%L
005 * HAPI FHIR JPA Server
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import ca.uhn.fhir.jpa.search.HapiLuceneAnalysisConfigurer;
024import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurationContext;
025import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurer;
026
027public class HapiElasticsearchAnalysisConfigurer implements ElasticsearchAnalysisConfigurer{
028
029        @Override
030        public void configure(ElasticsearchAnalysisConfigurationContext theConfigCtx) {
031
032                theConfigCtx.analyzer("autocompleteEdgeAnalyzer").custom()
033                        .tokenizer("pattern_all")
034                        .tokenFilters("lowercase", "stop", "edgengram_3_50");
035
036                theConfigCtx.tokenizer("pattern_all")
037                        .type("pattern")
038                        .param("pattern", "(.*)")
039                        .param("group", "1");
040
041                theConfigCtx.tokenFilter("edgengram_3_50")
042                        .type("edgeNGram")
043                        .param("min_gram", "3")
044                        .param("max_gram", "50");
045
046
047                theConfigCtx.analyzer("autocompleteWordEdgeAnalyzer").custom()
048                        .tokenizer("standard")
049                        .tokenFilters("lowercase", "stop", "wordedgengram_3_50");
050
051                theConfigCtx.tokenFilter("wordedgengram_3_50")
052                        .type("edgeNGram")
053                        .param("min_gram", "3")
054                        .param("max_gram", "20");
055
056                theConfigCtx.analyzer("autocompletePhoneticAnalyzer").custom()
057                        .tokenizer("standard")
058                        .tokenFilters("stop", "snowball_english");
059
060                theConfigCtx.tokenFilter("snowball_english")
061                        .type("snowball")
062                        .param("language", "English");
063
064                theConfigCtx.analyzer("autocompleteNGramAnalyzer").custom()
065                        .tokenizer("standard")
066                        .tokenFilters("word_delimiter", "lowercase", "ngram_3_20");
067
068                theConfigCtx.tokenFilter("ngram_3_20")
069                        .type("nGram")
070                        .param("min_gram", "3")
071                        .param("max_gram", "20");
072
073
074                theConfigCtx.analyzer(HapiLuceneAnalysisConfigurer.STANDARD_ANALYZER).custom()
075                        .tokenizer("standard")
076                        .tokenFilters("lowercase", "asciifolding");
077
078                theConfigCtx.analyzer(HapiLuceneAnalysisConfigurer.NORM_STRING_ANALYZER).custom()
079                        .tokenizer("keyword") // We need the whole string to match, including whitespace.
080                        .tokenFilters("lowercase", "asciifolding");
081
082                theConfigCtx.analyzer("exactAnalyzer")
083                        .custom()
084                        .tokenizer("keyword")
085                        .tokenFilters("unique");
086
087                theConfigCtx.analyzer("conceptParentPidsAnalyzer").custom()
088                        .tokenizer("whitespace");
089
090                theConfigCtx.analyzer("termConceptPropertyAnalyzer").custom()
091                        .tokenizer("whitespace");
092        }
093}