001/*- 002 * #%L 003 * HAPI FHIR JPA Server 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.jpa.dao.search; 021 022import org.apache.commons.lang3.StringUtils; 023 024import java.util.Arrays; 025import java.util.Set; 026import java.util.stream.Collectors; 027 028public class TermHelper { 029 030 /** characters which indicate the string parameter is a simple query string */ 031 private static final char[] simpleQuerySyntaxCharacters = new char[] {'+', '|', '"', '(', ')', '~'}; 032 033 /** 034 * Each input set element is: 035 * _ copied to the output set unchanged if it contains a '*' character or is quoted 036 * _ trimmed, tokenized by spaces, and suffixed by ' *', and each resulting string copied to the output set 037 */ 038 public static Set<String> makePrefixSearchTerm(Set<String> theStringSet) { 039 return theStringSet.stream() 040 .map(s -> isToLeftUntouched(s) || isQuoted(s) ? s : suffixTokensWithStar(s)) 041 .collect(Collectors.toSet()); 042 } 043 044 private static String suffixTokensWithStar(String theStr) { 045 StringBuilder sb = new StringBuilder(); 046 047 Arrays.stream(theStr.trim().split(" ")).forEach(s -> sb.append(s).append("* ")); 048 049 return sb.toString().trim(); 050 } 051 052 private static boolean isQuoted(String theS) { 053 return (theS.startsWith("\"") && theS.endsWith("\"")) || (theS.startsWith("'") && theS.endsWith("'")); 054 } 055 056 /** 057 * Returns true when the input string is recognized as Lucene Simple Query Syntax 058 * @see "https://lucene.apache.org/core/8_11_2/queryparser/org/apache/lucene/queryparser/simple/SimpleQueryParser.html" 059 */ 060 static boolean isToLeftUntouched(String theString) { 061 // remove backslashed * and - characters from string before checking, as those shouldn't be considered 062 if (theString.startsWith("-")) { 063 return true; 064 } // it is SimpleQuerySyntax 065 066 if (theString.endsWith("*")) { 067 return true; 068 } // it is SimpleQuerySyntax 069 070 return StringUtils.containsAny(theString, simpleQuerySyntaxCharacters); 071 } 072}