View Javadoc
1   package ca.uhn.fhir.jpa.dao;
2   
3   /*
4    * #%L
5    * HAPI FHIR JPA Server
6    * %%
7    * Copyright (C) 2014 - 2019 University Health Network
8    * %%
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   * 
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   * 
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * #L%
21   */
22  
23  import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
24  import ca.uhn.fhir.jpa.dao.index.IdHelperService;
25  import ca.uhn.fhir.jpa.model.entity.ResourceTable;
26  import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
27  import ca.uhn.fhir.model.api.IQueryParameterType;
28  import ca.uhn.fhir.rest.api.Constants;
29  import ca.uhn.fhir.rest.param.StringParam;
30  import ca.uhn.fhir.rest.param.TokenParam;
31  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
32  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
33  import com.google.common.collect.Lists;
34  import com.google.common.collect.Sets;
35  import org.apache.commons.lang3.StringUtils;
36  import org.apache.commons.lang3.Validate;
37  import org.apache.lucene.analysis.Analyzer;
38  import org.apache.lucene.search.Query;
39  import org.apache.lucene.search.highlight.Formatter;
40  import org.apache.lucene.search.highlight.*;
41  import org.hibernate.search.jpa.FullTextEntityManager;
42  import org.hibernate.search.jpa.FullTextQuery;
43  import org.hibernate.search.query.dsl.BooleanJunction;
44  import org.hibernate.search.query.dsl.QueryBuilder;
45  import org.hl7.fhir.instance.model.api.IAnyResource;
46  import org.springframework.beans.factory.annotation.Autowired;
47  import org.springframework.transaction.PlatformTransactionManager;
48  import org.springframework.transaction.annotation.Transactional;
49  import org.springframework.transaction.support.TransactionTemplate;
50  
51  import javax.persistence.EntityManager;
52  import javax.persistence.PersistenceContext;
53  import javax.persistence.PersistenceContextType;
54  import java.util.*;
55  
56  import static org.apache.commons.lang3.StringUtils.isNotBlank;
57  
58  public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
59  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
60  
61  	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
62  	private EntityManager myEntityManager;
63  	@Autowired
64  	private PlatformTransactionManager myTxManager;
65  
66  	@Autowired
67  	protected IForcedIdDao myForcedIdDao;
68  
69  	@Autowired
70  	private DaoConfig myDaoConfig;
71  
72  	@Autowired
73  	private IdHelperService myIdHelperService;
74  
75  	private Boolean ourDisabled;
76  
77  	/**
78  	 * Constructor
79  	 */
80  	public FulltextSearchSvcImpl() {
81  		super();
82  	}
83  
84  	private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<? extends IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) {
85  		if (theTerms == null) {
86  			return;
87  		}
88  		for (List<? extends IQueryParameterType> nextAnd : theTerms) {
89  			Set<String> terms = new HashSet<>();
90  			for (IQueryParameterType nextOr : nextAnd) {
91  				StringParam nextOrString = (StringParam) nextOr;
92  				String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim();
93  				if (isNotBlank(nextValueTrimmed)) {
94  					terms.add(nextValueTrimmed);
95  				}
96  			}
97  			if (terms.isEmpty() == false) {
98  				if (terms.size() == 1) {
99  					//@formatter:off
100 					Query textQuery = theQueryBuilder
101 						.phrase()
102 						.withSlop(2)
103 						.onField(theFieldName).boostedTo(4.0f)
104 //						.andField(theFieldNameEdgeNGram).boostedTo(2.0f)
105 //						.andField(theFieldNameNGram).boostedTo(1.0f)
106 						.sentence(terms.iterator().next().toLowerCase()).createQuery();
107 					//@formatter:on
108 
109 					theBoolean.must(textQuery);
110 				} else {
111 					String joinedTerms = StringUtils.join(terms, ' ');
112 					theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery());
113 				}
114 			}
115 		}
116 	}
117 
118 	private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) {
119 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
120 
121 		List<Long> pids = null;
122 		
123 		/*
124 		 * Handle textual params
125 		 */
126 		/*
127 		for (String nextParamName : theParams.keySet()) {
128 			for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) {
129 				for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) {
130 					IQueryParameterType nextParam = orIterator.next();
131 					if (nextParam instanceof TokenParam) {
132 						TokenParam nextTokenParam = (TokenParam) nextParam;
133 						if (nextTokenParam.isText()) {
134 							orIterator.remove();
135 							QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get();
136 							BooleanJunction<?> bool = qb.bool();
137 
138 							bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery());
139 							if (isNotBlank(theResourceName)) {
140 								bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
141 							}
142 //							
143 							//@formatter:off
144 							String value = nextTokenParam.getValue().toLowerCase();
145 							bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery());
146 							
147 							//@formatter:on
148 							
149 							FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class);
150 
151 							List<?> resultList = ftq.getResultList();
152 							pids = new ArrayList<Long>();
153 							for (Object next : resultList) {
154 								ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next;
155 								pids.add(nextAsArray.getResourcePid());
156 							}
157 						}
158 					}
159 				}
160 			}
161 		}
162 		
163 		if (pids != null && pids.isEmpty()) {
164 			return pids;
165 		}
166 		*/
167 
168 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
169 		BooleanJunction<?> bool = qb.bool();
170 
171 		/*
172 		 * Handle _content parameter (resource body content)
173 		 */
174 		List<List<? extends IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT);
175 		addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram");
176 
177 		/*
178 		 * Handle _text parameter (resource narrative content)
179 		 */
180 		List<List<? extends IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT);
181 		addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram");
182 
183 		if (theReferencingPid != null) {
184 			bool.must(qb.keyword().onField("myResourceLinksField").matching(theReferencingPid.toString()).createQuery());
185 		}
186 
187 		if (bool.isEmpty()) {
188 			return pids;
189 		}
190 
191 		if (isNotBlank(theResourceName)) {
192 			bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
193 		}
194 
195 		Query luceneQuery = bool.createQuery();
196 
197 		// wrap Lucene query in a javax.persistence.Query
198 		FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class);
199 		jpaQuery.setProjection("myId");
200 
201 		// execute search
202 		List<?> result = jpaQuery.getResultList();
203 
204 		ArrayList<Long> retVal = new ArrayList<>();
205 		for (Object object : result) {
206 			Object[] nextArray = (Object[]) object;
207 			Long next = (Long) nextArray[0];
208 			if (next != null) {
209 				retVal.add(next);
210 			}
211 		}
212 
213 		return retVal;
214 	}
215 
216 	@Override
217 	public List<Long> everything(String theResourceName, SearchParameterMap theParams) {
218 
219 		Long pid = null;
220 		if (theParams.get(IAnyResource.SP_RES_ID) != null) {
221 			String idParamValue;
222 			IQueryParameterType idParam = theParams.get(IAnyResource.SP_RES_ID).get(0).get(0);
223 			if (idParam instanceof TokenParam) {
224 				TokenParam idParm = (TokenParam) idParam;
225 				idParamValue = idParm.getValue();
226 			} else {
227 				StringParam idParm = (StringParam) idParam;
228 				idParamValue = idParm.getValue();
229 			}
230 			pid = myIdHelperService.translateForcedIdToPid(theResourceName, idParamValue);
231 		}
232 
233 		Long referencingPid = pid;
234 		List<Long> retVal = doSearch(null, theParams, referencingPid);
235 		if (referencingPid != null) {
236 			retVal.add(referencingPid);
237 		}
238 		return retVal;
239 	}
240 
241 	@Override
242 	public boolean isDisabled() {
243 		Boolean retVal = ourDisabled;
244 
245 		if (retVal == null) {
246 			retVal = new TransactionTemplate(myTxManager).execute(t -> {
247 				try {
248 					FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
249 					em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
250 					return Boolean.FALSE;
251 				} catch (Exception e) {
252 					ourLog.trace("FullText test failed", e);
253 					ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled");
254 					return Boolean.TRUE;
255 				}
256 			});
257 			ourDisabled = retVal;
258 		}
259 
260 		assert retVal != null;
261 		return retVal;
262 	}
263 
264 	@Transactional()
265 	@Override
266 	public List<Long> search(String theResourceName, SearchParameterMap theParams) {
267 		return doSearch(theResourceName, theParams, null);
268 	}
269 
270 	@Transactional()
271 	@Override
272 	public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) {
273 		Validate.notBlank(theContext, "theContext must be provided");
274 		Validate.notBlank(theSearchParam, "theSearchParam must be provided");
275 		Validate.notBlank(theText, "theSearchParam must be provided");
276 
277 		long start = System.currentTimeMillis();
278 
279 		String[] contextParts = StringUtils.split(theContext, '/');
280 		if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) {
281 			throw new InvalidRequestException("Invalid context: " + theContext);
282 		}
283 		Long pid = myIdHelperService.translateForcedIdToPid(contextParts[0], contextParts[1]);
284 
285 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
286 
287 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
288 
289 		Query textQuery = qb
290 			.phrase()
291 			.withSlop(2)
292 			.onField("myContentText").boostedTo(4.0f)
293 			.andField("myContentTextEdgeNGram").boostedTo(2.0f)
294 			.andField("myContentTextNGram").boostedTo(1.0f)
295 			.andField("myContentTextPhonetic").boostedTo(0.5f)
296 			.sentence(theText.toLowerCase()).createQuery();
297 
298 		Query query = qb.bool()
299 //			.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery())
300 			.must(qb.keyword().onField("myResourceLinksField").matching(pid.toString()).createQuery())
301 			.must(textQuery)
302 			.createQuery();
303 
304 		FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class);
305 		ftq.setProjection("myContentText");
306 		ftq.setMaxResults(20);
307 
308 		List<?> resultList = ftq.getResultList();
309 		List<Suggestion> suggestions = Lists.newArrayList();
310 		for (Object next : resultList) {
311 			Object[] nextAsArray = (Object[]) next;
312 			String nextValue = (String) nextAsArray[0];
313 
314 			try {
315 				MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
316 				Scorer scorer = new QueryScorer(textQuery);
317 				Highlighter highlighter = new Highlighter(formatter, scorer);
318 				Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
319 
320 				formatter.setAnalyzer("myContentTextPhonetic");
321 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
322 
323 				formatter.setAnalyzer("myContentTextNGram");
324 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
325 
326 				formatter.setFindPhrasesWith();
327 				formatter.setAnalyzer("myContentTextEdgeNGram");
328 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
329 
330 			} catch (Exception e) {
331 				throw new InternalErrorException(e);
332 			}
333 
334 		}
335 
336 		Collections.sort(suggestions);
337 
338 		Set<String> terms = Sets.newHashSet();
339 		for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
340 			String nextTerm = iter.next().getTerm().toLowerCase();
341 			if (!terms.add(nextTerm)) {
342 				iter.remove();
343 			}
344 		}
345 
346 		long delay = System.currentTimeMillis() - start;
347 		ourLog.info("Provided {} suggestions for term {} in {} ms", terms.size(), theText, delay);
348 
349 		return suggestions;
350 	}
351 
352 	public class MySuggestionFormatter implements Formatter {
353 
354 		private List<Suggestion> mySuggestions;
355 		private String myAnalyzer;
356 		private ArrayList<String> myPartialMatchPhrases;
357 		private ArrayList<Float> myPartialMatchScores;
358 		private String myOriginalSearch;
359 
360 		MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
361 			myOriginalSearch = theOriginalSearch;
362 			mySuggestions = theSuggestions;
363 		}
364 
365 		@Override
366 		public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
367 			ourLog.debug("{} Found {} with score {}", myAnalyzer, theOriginalText, theTokenGroup.getTotalScore());
368 			if (theTokenGroup.getTotalScore() > 0) {
369 				float score = theTokenGroup.getTotalScore();
370 				if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
371 					score = score + 1.0f;
372 				}
373 				mySuggestions.add(new Suggestion(theOriginalText, score));
374 			} else if (myPartialMatchPhrases != null) {
375 				if (theOriginalText.length() < 100) {
376 					for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
377 						if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
378 							mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
379 						}
380 					}
381 				}
382 			}
383 
384 			return null;
385 		}
386 
387 		void setAnalyzer(String theString) {
388 			myAnalyzer = theString;
389 		}
390 
391 		void setFindPhrasesWith() {
392 			myPartialMatchPhrases = new ArrayList<>();
393 			myPartialMatchScores = new ArrayList<>();
394 
395 			for (Suggestion next : mySuggestions) {
396 				myPartialMatchPhrases.add(' ' + next.myTerm);
397 				myPartialMatchScores.add(next.myScore);
398 			}
399 
400 			myPartialMatchPhrases.add(myOriginalSearch);
401 			myPartialMatchScores.add(1.0f);
402 		}
403 
404 	}
405 
406 	public static class Suggestion implements Comparable<Suggestion> {
407 		private String myTerm;
408 		private float myScore;
409 
410 		Suggestion(String theTerm, float theScore) {
411 			myTerm = theTerm;
412 			myScore = theScore;
413 		}
414 
415 		@Override
416 		public int compareTo(Suggestion theO) {
417 			return Float.compare(theO.myScore, myScore);
418 		}
419 
420 		public float getScore() {
421 			return myScore;
422 		}
423 
424 		public String getTerm() {
425 			return myTerm;
426 		}
427 
428 		@Override
429 		public String toString() {
430 			return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]";
431 		}
432 	}
433 
434 }