001/*-
002 * #%L
003 * HAPI FHIR JPA Server
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.jpa.dao.search;
021
022import org.apache.commons.lang3.StringUtils;
023
024import java.util.Arrays;
025import java.util.Set;
026import java.util.stream.Collectors;
027
028public class TermHelper {
029
030        /** characters which indicate the string parameter is a simple query string */
031        private static final char[] simpleQuerySyntaxCharacters = new char[] {'+', '|', '"', '(', ')', '~'};
032
033        /**
034         * Each input set element is:
035         *   _ copied to the output set unchanged if it contains a '*' character or is quoted
036         *   _ trimmed, tokenized by spaces, and suffixed by ' *', and each resulting string copied to the output set
037         */
038        public static Set<String> makePrefixSearchTerm(Set<String> theStringSet) {
039                return theStringSet.stream()
040                                .map(s -> isToLeftUntouched(s) || isQuoted(s) ? s : suffixTokensWithStar(s))
041                                .collect(Collectors.toSet());
042        }
043
044        private static String suffixTokensWithStar(String theStr) {
045                StringBuilder sb = new StringBuilder();
046
047                Arrays.stream(theStr.trim().split(" ")).forEach(s -> sb.append(s).append("* "));
048
049                return sb.toString().trim();
050        }
051
052        private static boolean isQuoted(String theS) {
053                return (theS.startsWith("\"") && theS.endsWith("\"")) || (theS.startsWith("'") && theS.endsWith("'"));
054        }
055
056        /**
057         * Returns true when the input string is recognized as Lucene Simple Query Syntax
058         * @see "https://lucene.apache.org/core/8_11_2/queryparser/org/apache/lucene/queryparser/simple/SimpleQueryParser.html"
059         */
060        static boolean isToLeftUntouched(String theString) {
061                // remove backslashed * and - characters from string before checking, as those shouldn't be considered
062                if (theString.startsWith("-")) {
063                        return true;
064                } // it is SimpleQuerySyntax
065
066                if (theString.endsWith("*")) {
067                        return true;
068                } // it is SimpleQuerySyntax
069
070                return StringUtils.containsAny(theString, simpleQuerySyntaxCharacters);
071        }
072}