View Javadoc
1   package ca.uhn.fhir.jpa.dao;
2   
3   /*
4    * #%L
5    * HAPI FHIR JPA Server
6    * %%
7    * Copyright (C) 2014 - 2019 University Health Network
8    * %%
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   *
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * #L%
21   */
22  
23  import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
24  import ca.uhn.fhir.jpa.dao.index.IdHelperService;
25  import ca.uhn.fhir.jpa.model.entity.ResourceTable;
26  import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
27  import ca.uhn.fhir.model.api.IQueryParameterType;
28  import ca.uhn.fhir.rest.api.Constants;
29  import ca.uhn.fhir.rest.api.server.RequestDetails;
30  import ca.uhn.fhir.rest.param.StringParam;
31  import ca.uhn.fhir.rest.param.TokenParam;
32  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
33  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
34  import com.google.common.collect.Lists;
35  import com.google.common.collect.Sets;
36  import org.apache.commons.lang3.StringUtils;
37  import org.apache.commons.lang3.Validate;
38  import org.apache.lucene.analysis.Analyzer;
39  import org.apache.lucene.search.Query;
40  import org.apache.lucene.search.highlight.Formatter;
41  import org.apache.lucene.search.highlight.*;
42  import org.hibernate.search.jpa.FullTextEntityManager;
43  import org.hibernate.search.jpa.FullTextQuery;
44  import org.hibernate.search.query.dsl.BooleanJunction;
45  import org.hibernate.search.query.dsl.QueryBuilder;
46  import org.hl7.fhir.instance.model.api.IAnyResource;
47  import org.springframework.beans.factory.annotation.Autowired;
48  import org.springframework.transaction.PlatformTransactionManager;
49  import org.springframework.transaction.annotation.Transactional;
50  import org.springframework.transaction.support.TransactionTemplate;
51  
52  import javax.persistence.EntityManager;
53  import javax.persistence.PersistenceContext;
54  import javax.persistence.PersistenceContextType;
55  import java.util.*;
56  
57  import static org.apache.commons.lang3.StringUtils.isNotBlank;
58  
59  public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
60  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
61  
62  	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
63  	private EntityManager myEntityManager;
64  	@Autowired
65  	private PlatformTransactionManager myTxManager;
66  
67  	@Autowired
68  	protected IForcedIdDao myForcedIdDao;
69  
70  	@Autowired
71  	private DaoConfig myDaoConfig;
72  
73  	@Autowired
74  	private IdHelperService myIdHelperService;
75  
76  	private Boolean ourDisabled;
77  
78  	/**
79  	 * Constructor
80  	 */
81  	public FulltextSearchSvcImpl() {
82  		super();
83  	}
84  
85  	private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) {
86  		if (theTerms == null) {
87  			return;
88  		}
89  		for (List<? extends IQueryParameterType> nextAnd : theTerms) {
90  			Set<String> terms = new HashSet<>();
91  			for (IQueryParameterType nextOr : nextAnd) {
92  				StringParam nextOrString = (StringParam) nextOr;
93  				String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim();
94  				if (isNotBlank(nextValueTrimmed)) {
95  					terms.add(nextValueTrimmed);
96  				}
97  			}
98  			if (terms.isEmpty() == false) {
99  				if (terms.size() == 1) {
100 					//@formatter:off
101 					Query textQuery = theQueryBuilder
102 						.phrase()
103 						.withSlop(2)
104 						.onField(theFieldName).boostedTo(4.0f)
105 //						.andField(theFieldNameEdgeNGram).boostedTo(2.0f)
106 //						.andField(theFieldNameNGram).boostedTo(1.0f)
107 						.sentence(terms.iterator().next().toLowerCase()).createQuery();
108 					//@formatter:on
109 
110 					theBoolean.must(textQuery);
111 				} else {
112 					String joinedTerms = StringUtils.join(terms, ' ');
113 					theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery());
114 				}
115 			}
116 		}
117 	}
118 
119 	private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) {
120 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
121 
122 		List<Long> pids = null;
123 		
124 		/*
125 		 * Handle textual params
126 		 */
127 		/*
128 		for (String nextParamName : theParams.keySet()) {
129 			for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) {
130 				for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) {
131 					IQueryParameterType nextParam = orIterator.next();
132 					if (nextParam instanceof TokenParam) {
133 						TokenParam nextTokenParam = (TokenParam) nextParam;
134 						if (nextTokenParam.isText()) {
135 							orIterator.remove();
136 							QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get();
137 							BooleanJunction<?> bool = qb.bool();
138 
139 							bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery());
140 							if (isNotBlank(theResourceName)) {
141 								bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
142 							}
143 //							
144 							//@formatter:off
145 							String value = nextTokenParam.getValue().toLowerCase();
146 							bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery());
147 							
148 							//@formatter:on
149 							
150 							FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class);
151 
152 							List<?> resultList = ftq.getResultList();
153 							pids = new ArrayList<Long>();
154 							for (Object next : resultList) {
155 								ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next;
156 								pids.add(nextAsArray.getResourcePid());
157 							}
158 						}
159 					}
160 				}
161 			}
162 		}
163 		
164 		if (pids != null && pids.isEmpty()) {
165 			return pids;
166 		}
167 		*/
168 
169 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
170 		BooleanJunction<?> bool = qb.bool();
171 
172 		/*
173 		 * Handle _content parameter (resource body content)
174 		 */
175 		List<List<IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT);
176 		addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram");
177 
178 		/*
179 		 * Handle _text parameter (resource narrative content)
180 		 */
181 		List<List<IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT);
182 		addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram");
183 
184 		if (theReferencingPid != null) {
185 			bool.must(qb.keyword().onField("myResourceLinksField").matching(theReferencingPid.toString()).createQuery());
186 		}
187 
188 		if (bool.isEmpty()) {
189 			return pids;
190 		}
191 
192 		if (isNotBlank(theResourceName)) {
193 			bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
194 		}
195 
196 		Query luceneQuery = bool.createQuery();
197 
198 		// wrap Lucene query in a javax.persistence.SqlQuery
199 		FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class);
200 		jpaQuery.setProjection("myId");
201 
202 		// execute search
203 		List<?> result = jpaQuery.getResultList();
204 
205 		ArrayList<Long> retVal = new ArrayList<>();
206 		for (Object object : result) {
207 			Object[] nextArray = (Object[]) object;
208 			Long next = (Long) nextArray[0];
209 			if (next != null) {
210 				retVal.add(next);
211 			}
212 		}
213 
214 		return retVal;
215 	}
216 
217 	@Override
218 	public List<Long> everything(String theResourceName, SearchParameterMap theParams, RequestDetails theRequest) {
219 
220 		Long pid = null;
221 		if (theParams.get(IAnyResource.SP_RES_ID) != null) {
222 			String idParamValue;
223 			IQueryParameterType idParam = theParams.get(IAnyResource.SP_RES_ID).get(0).get(0);
224 			if (idParam instanceof TokenParam) {
225 				TokenParam idParm = (TokenParam) idParam;
226 				idParamValue = idParm.getValue();
227 			} else {
228 				StringParam idParm = (StringParam) idParam;
229 				idParamValue = idParm.getValue();
230 			}
231 			pid = myIdHelperService.translateForcedIdToPid(theResourceName, idParamValue, theRequest);
232 		}
233 
234 		Long referencingPid = pid;
235 		List<Long> retVal = doSearch(null, theParams, referencingPid);
236 		if (referencingPid != null) {
237 			retVal.add(referencingPid);
238 		}
239 		return retVal;
240 	}
241 
242 	@Override
243 	public boolean isDisabled() {
244 		Boolean retVal = ourDisabled;
245 
246 		if (retVal == null) {
247 			retVal = new TransactionTemplate(myTxManager).execute(t -> {
248 				try {
249 					FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
250 					em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
251 					return Boolean.FALSE;
252 				} catch (Exception e) {
253 					ourLog.trace("FullText test failed", e);
254 					ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled");
255 					return Boolean.TRUE;
256 				}
257 			});
258 			ourDisabled = retVal;
259 		}
260 
261 		assert retVal != null;
262 		return retVal;
263 	}
264 
265 	@Transactional()
266 	@Override
267 	public List<Long> search(String theResourceName, SearchParameterMap theParams) {
268 		return doSearch(theResourceName, theParams, null);
269 	}
270 
271 	@Transactional()
272 	@Override
273 	public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText, RequestDetails theRequest) {
274 		Validate.notBlank(theContext, "theContext must be provided");
275 		Validate.notBlank(theSearchParam, "theSearchParam must be provided");
276 		Validate.notBlank(theText, "theSearchParam must be provided");
277 
278 		long start = System.currentTimeMillis();
279 
280 		String[] contextParts = StringUtils.split(theContext, '/');
281 		if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) {
282 			throw new InvalidRequestException("Invalid context: " + theContext);
283 		}
284 		Long pid = myIdHelperService.translateForcedIdToPid(contextParts[0], contextParts[1], theRequest);
285 
286 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
287 
288 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
289 
290 		Query textQuery = qb
291 			.phrase()
292 			.withSlop(2)
293 			.onField("myContentText").boostedTo(4.0f)
294 			.andField("myContentTextEdgeNGram").boostedTo(2.0f)
295 			.andField("myContentTextNGram").boostedTo(1.0f)
296 			.andField("myContentTextPhonetic").boostedTo(0.5f)
297 			.sentence(theText.toLowerCase()).createQuery();
298 
299 		Query query = qb.bool()
300 //			.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery())
301 			.must(qb.keyword().onField("myResourceLinksField").matching(pid.toString()).createQuery())
302 			.must(textQuery)
303 			.createQuery();
304 
305 		FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class);
306 		ftq.setProjection("myContentText");
307 		ftq.setMaxResults(20);
308 
309 		List<?> resultList = ftq.getResultList();
310 		List<Suggestion> suggestions = Lists.newArrayList();
311 		for (Object next : resultList) {
312 			Object[] nextAsArray = (Object[]) next;
313 			String nextValue = (String) nextAsArray[0];
314 
315 			try {
316 				MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
317 				Scorer scorer = new QueryScorer(textQuery);
318 				Highlighter highlighter = new Highlighter(formatter, scorer);
319 				Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
320 
321 				formatter.setAnalyzer("myContentTextPhonetic");
322 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
323 
324 				formatter.setAnalyzer("myContentTextNGram");
325 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
326 
327 				formatter.setFindPhrasesWith();
328 				formatter.setAnalyzer("myContentTextEdgeNGram");
329 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
330 
331 			} catch (Exception e) {
332 				throw new InternalErrorException(e);
333 			}
334 
335 		}
336 
337 		Collections.sort(suggestions);
338 
339 		Set<String> terms = Sets.newHashSet();
340 		for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
341 			String nextTerm = iter.next().getTerm().toLowerCase();
342 			if (!terms.add(nextTerm)) {
343 				iter.remove();
344 			}
345 		}
346 
347 		long delay = System.currentTimeMillis() - start;
348 		ourLog.info("Provided {} suggestions for term {} in {} ms", terms.size(), theText, delay);
349 
350 		return suggestions;
351 	}
352 
353 	public class MySuggestionFormatter implements Formatter {
354 
355 		private List<Suggestion> mySuggestions;
356 		private String myAnalyzer;
357 		private ArrayList<String> myPartialMatchPhrases;
358 		private ArrayList<Float> myPartialMatchScores;
359 		private String myOriginalSearch;
360 
361 		MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
362 			myOriginalSearch = theOriginalSearch;
363 			mySuggestions = theSuggestions;
364 		}
365 
366 		@Override
367 		public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
368 			ourLog.debug("{} Found {} with score {}", myAnalyzer, theOriginalText, theTokenGroup.getTotalScore());
369 			if (theTokenGroup.getTotalScore() > 0) {
370 				float score = theTokenGroup.getTotalScore();
371 				if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
372 					score = score + 1.0f;
373 				}
374 				mySuggestions.add(new Suggestion(theOriginalText, score));
375 			} else if (myPartialMatchPhrases != null) {
376 				if (theOriginalText.length() < 100) {
377 					for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
378 						if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
379 							mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
380 						}
381 					}
382 				}
383 			}
384 
385 			return null;
386 		}
387 
388 		void setAnalyzer(String theString) {
389 			myAnalyzer = theString;
390 		}
391 
392 		void setFindPhrasesWith() {
393 			myPartialMatchPhrases = new ArrayList<>();
394 			myPartialMatchScores = new ArrayList<>();
395 
396 			for (Suggestion next : mySuggestions) {
397 				myPartialMatchPhrases.add(' ' + next.myTerm);
398 				myPartialMatchScores.add(next.myScore);
399 			}
400 
401 			myPartialMatchPhrases.add(myOriginalSearch);
402 			myPartialMatchScores.add(1.0f);
403 		}
404 
405 	}
406 
407 	public static class Suggestion implements Comparable<Suggestion> {
408 		private String myTerm;
409 		private float myScore;
410 
411 		Suggestion(String theTerm, float theScore) {
412 			myTerm = theTerm;
413 			myScore = theScore;
414 		}
415 
416 		@Override
417 		public int compareTo(Suggestion theO) {
418 			return Float.compare(theO.myScore, myScore);
419 		}
420 
421 		public float getScore() {
422 			return myScore;
423 		}
424 
425 		public String getTerm() {
426 			return myTerm;
427 		}
428 
429 		@Override
430 		public String toString() {
431 			return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]";
432 		}
433 	}
434 
435 }