001package ca.uhn.fhir.jpa.search;
002
003/*-
004 * #%L
005 * HAPI FHIR JPA Server
006 * %%
007 * Copyright (C) 2014 - 2021 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
024import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
025import org.apache.lucene.analysis.core.StopFilterFactory;
026import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
027import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory;
028import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory;
029import org.apache.lucene.analysis.ngram.NGramFilterFactory;
030import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
031import org.apache.lucene.analysis.phonetic.PhoneticFilterFactory;
032import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
033import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
034import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurationContext;
035import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurer;
036import org.springframework.stereotype.Component;
037
038/**
039 * Factory for defining the analysers.
040 */
041@Component
042public class HapiLuceneAnalysisConfigurer implements LuceneAnalysisConfigurer {
043
044        @Override
045        public void configure(LuceneAnalysisConfigurationContext theLuceneCtx) {
046                theLuceneCtx.analyzer("autocompleteEdgeAnalyzer").custom()
047                        .tokenizer(PatternTokenizerFactory.class).param("pattern", "(.*)").param("group", "1")
048                        .tokenFilter(LowerCaseFilterFactory.class)
049                        .tokenFilter(StopFilterFactory.class)
050                        .tokenFilter(EdgeNGramFilterFactory.class)
051                        .param("minGramSize", "3")
052                        .param("maxGramSize", "50");
053
054                theLuceneCtx.analyzer("autocompletePhoneticAnalyzer").custom()
055                        .tokenizer(StandardTokenizerFactory.class)
056                        .tokenFilter(StopFilterFactory.class)
057                        .tokenFilter(PhoneticFilterFactory.class).param("encoder", "DoubleMetaphone")
058                        .tokenFilter(SnowballPorterFilterFactory.class).param("language", "English");
059
060                theLuceneCtx.analyzer("autocompleteNGramAnalyzer").custom()
061                        .tokenizer(StandardTokenizerFactory.class)
062                        .tokenFilter(WordDelimiterFilterFactory.class)
063                        .tokenFilter(LowerCaseFilterFactory.class)
064                        .tokenFilter(NGramFilterFactory.class)
065                        .param("minGramSize", "3")
066                        .param("maxGramSize", "20");
067
068                theLuceneCtx.analyzer("autocompleteWordEdgeAnalyzer").custom()
069                        .tokenizer(StandardTokenizerFactory.class)
070                        .tokenFilter(LowerCaseFilterFactory.class)
071                        .tokenFilter(StopFilterFactory.class)
072                        .tokenFilter(EdgeNGramFilterFactory.class)
073                        .param("minGramSize", "3")
074                        .param("maxGramSize", "20");
075
076                theLuceneCtx.analyzer("standardAnalyzer").custom()
077                        .tokenizer(StandardTokenizerFactory.class)
078                        .tokenFilter(LowerCaseFilterFactory.class);
079
080                theLuceneCtx.analyzer("exactAnalyzer").custom()
081                        .tokenizer(KeywordTokenizerFactory.class);
082
083                theLuceneCtx.analyzer("conceptParentPidsAnalyzer").custom()
084                        .tokenizer(WhitespaceTokenizerFactory.class);
085
086                theLuceneCtx.analyzer("termConceptPropertyAnalyzer").custom()
087                        .tokenizer(WhitespaceTokenizerFactory.class);
088        }
089}