View Javadoc
1   package ca.uhn.fhir.jpa.dao;
2   
3   /*
4    * #%L
5    * HAPI FHIR JPA Server
6    * %%
7    * Copyright (C) 2014 - 2018 University Health Network
8    * %%
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   * 
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   * 
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * #L%
21   */
22  
23  import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
24  import ca.uhn.fhir.jpa.entity.ResourceTable;
25  import ca.uhn.fhir.model.api.IQueryParameterType;
26  import ca.uhn.fhir.rest.api.Constants;
27  import ca.uhn.fhir.rest.param.StringParam;
28  import ca.uhn.fhir.rest.param.TokenParam;
29  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
30  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
31  import com.google.common.collect.Lists;
32  import com.google.common.collect.Sets;
33  import org.apache.commons.lang3.StringUtils;
34  import org.apache.commons.lang3.Validate;
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.search.Query;
37  import org.apache.lucene.search.highlight.Formatter;
38  import org.apache.lucene.search.highlight.*;
39  import org.hibernate.search.jpa.FullTextEntityManager;
40  import org.hibernate.search.jpa.FullTextQuery;
41  import org.hibernate.search.query.dsl.BooleanJunction;
42  import org.hibernate.search.query.dsl.QueryBuilder;
43  import org.hl7.fhir.dstu3.model.BaseResource;
44  import org.hl7.fhir.instance.model.api.IBaseResource;
45  import org.springframework.beans.factory.annotation.Autowired;
46  import org.springframework.transaction.PlatformTransactionManager;
47  import org.springframework.transaction.annotation.Transactional;
48  import org.springframework.transaction.support.TransactionTemplate;
49  
50  import javax.persistence.EntityManager;
51  import javax.persistence.PersistenceContext;
52  import javax.persistence.PersistenceContextType;
53  import java.util.*;
54  
55  import static org.apache.commons.lang3.StringUtils.isNotBlank;
56  
57  public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
58  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
59  
60  	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
61  	private EntityManager myEntityManager;
62  	@Autowired
63  	private PlatformTransactionManager myTxManager;
64  
65  	@Autowired
66  	protected IForcedIdDao myForcedIdDao;
67  
68  	private Boolean ourDisabled;
69  
70  	/**
71  	 * Constructor
72  	 */
73  	public FulltextSearchSvcImpl() {
74  		super();
75  	}
76  
77  	private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<? extends IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) {
78  		if (theTerms == null) {
79  			return;
80  		}
81  		for (List<? extends IQueryParameterType> nextAnd : theTerms) {
82  			Set<String> terms = new HashSet<>();
83  			for (IQueryParameterType nextOr : nextAnd) {
84  				StringParam nextOrString = (StringParam) nextOr;
85  				String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim();
86  				if (isNotBlank(nextValueTrimmed)) {
87  					terms.add(nextValueTrimmed);
88  				}
89  			}
90  			if (terms.isEmpty() == false) {
91  				if (terms.size() == 1) {
92  					//@formatter:off
93  					Query textQuery = theQueryBuilder
94  						.phrase()
95  						.withSlop(2)
96  						.onField(theFieldName).boostedTo(4.0f)
97  //						.andField(theFieldNameEdgeNGram).boostedTo(2.0f)
98  //						.andField(theFieldNameNGram).boostedTo(1.0f)
99  						.sentence(terms.iterator().next().toLowerCase()).createQuery();
100 					//@formatter:on
101 
102 					theBoolean.must(textQuery);
103 				} else {
104 					String joinedTerms = StringUtils.join(terms, ' ');
105 					theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery());
106 				}
107 			}
108 		}
109 	}
110 
111 	private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) {
112 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
113 
114 		List<Long> pids = null;
115 		
116 		/*
117 		 * Handle textual params
118 		 */
119 		/*
120 		for (String nextParamName : theParams.keySet()) {
121 			for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) {
122 				for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) {
123 					IQueryParameterType nextParam = orIterator.next();
124 					if (nextParam instanceof TokenParam) {
125 						TokenParam nextTokenParam = (TokenParam) nextParam;
126 						if (nextTokenParam.isText()) {
127 							orIterator.remove();
128 							QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get();
129 							BooleanJunction<?> bool = qb.bool();
130 
131 							bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery());
132 							if (isNotBlank(theResourceName)) {
133 								bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
134 							}
135 //							
136 							//@formatter:off
137 							String value = nextTokenParam.getValue().toLowerCase();
138 							bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery());
139 							
140 							//@formatter:on
141 							
142 							FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class);
143 
144 							List<?> resultList = ftq.getResultList();
145 							pids = new ArrayList<Long>();
146 							for (Object next : resultList) {
147 								ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next;
148 								pids.add(nextAsArray.getResourcePid());
149 							}
150 						}
151 					}
152 				}
153 			}
154 		}
155 		
156 		if (pids != null && pids.isEmpty()) {
157 			return pids;
158 		}
159 		*/
160 
161 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
162 		BooleanJunction<?> bool = qb.bool();
163 
164 		/*
165 		 * Handle _content parameter (resource body content)
166 		 */
167 		List<List<? extends IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT);
168 		addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram");
169 
170 		/*
171 		 * Handle _text parameter (resource narrative content)
172 		 */
173 		List<List<? extends IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT);
174 		addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram");
175 
176 		if (theReferencingPid != null) {
177 			bool.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(theReferencingPid).createQuery());
178 		}
179 
180 		if (bool.isEmpty()) {
181 			return pids;
182 		}
183 
184 		if (isNotBlank(theResourceName)) {
185 			bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
186 		}
187 
188 		Query luceneQuery = bool.createQuery();
189 
190 		// wrap Lucene query in a javax.persistence.Query
191 		FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class);
192 		jpaQuery.setProjection("myId");
193 
194 		// execute search
195 		List<?> result = jpaQuery.getResultList();
196 
197 		HashSet<Long> pidsSet = pids != null ? new HashSet<Long>(pids) : null;
198 
199 		ArrayList<Long> retVal = new ArrayList<Long>();
200 		for (Object object : result) {
201 			Object[] nextArray = (Object[]) object;
202 			Long next = (Long) nextArray[0];
203 			if (next != null && (pidsSet == null || pidsSet.contains(next))) {
204 				retVal.add(next);
205 			}
206 		}
207 
208 		return retVal;
209 	}
210 
211 	@Override
212 	public List<Long> everything(String theResourceName, SearchParameterMap theParams) {
213 
214 		Long pid = null;
215 		if (theParams.get(BaseResource.SP_RES_ID) != null) {
216 			String idParamValue;
217 			IQueryParameterType idParam = theParams.get(BaseResource.SP_RES_ID).get(0).get(0);
218 			if (idParam instanceof TokenParam) {
219 				TokenParam idParm = (TokenParam) idParam;
220 				idParamValue = idParm.getValue();
221 			} else {
222 				StringParam idParm = (StringParam) idParam;
223 				idParamValue = idParm.getValue();
224 			}
225 			pid = BaseHapiFhirDao.translateForcedIdToPid(theResourceName, idParamValue, myForcedIdDao);
226 		}
227 
228 		Long referencingPid = pid;
229 		List<Long> retVal = doSearch(null, theParams, referencingPid);
230 		if (referencingPid != null) {
231 			retVal.add(referencingPid);
232 		}
233 		return retVal;
234 	}
235 
236 	@Override
237 	public boolean isDisabled() {
238 		Boolean retVal = ourDisabled;
239 
240 		if (retVal == null) {
241 			retVal = new TransactionTemplate(myTxManager).execute(t -> {
242 				try {
243 					FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
244 					em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
245 					return Boolean.FALSE;
246 				} catch (Exception e) {
247 					ourLog.trace("FullText test failed", e);
248 					ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled");
249 					return Boolean.TRUE;
250 				}
251 			});
252 			ourDisabled = retVal;
253 		}
254 
255 		assert retVal != null;
256 		return retVal;
257 	}
258 
259 	@Transactional()
260 	@Override
261 	public List<Long> search(String theResourceName, SearchParameterMap theParams) {
262 		return doSearch(theResourceName, theParams, null);
263 	}
264 
265 	@Transactional()
266 	@Override
267 	public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) {
268 		Validate.notBlank(theContext, "theContext must be provided");
269 		Validate.notBlank(theSearchParam, "theSearchParam must be provided");
270 		Validate.notBlank(theText, "theSearchParam must be provided");
271 
272 		long start = System.currentTimeMillis();
273 
274 		String[] contextParts = StringUtils.split(theContext, '/');
275 		if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) {
276 			throw new InvalidRequestException("Invalid context: " + theContext);
277 		}
278 		Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao);
279 
280 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
281 
282 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
283 
284 		Query textQuery = qb
285 			.phrase()
286 			.withSlop(2)
287 			.onField("myContentText").boostedTo(4.0f)
288 			.andField("myContentTextEdgeNGram").boostedTo(2.0f)
289 			.andField("myContentTextNGram").boostedTo(1.0f)
290 			.andField("myContentTextPhonetic").boostedTo(0.5f)
291 			.sentence(theText.toLowerCase()).createQuery();
292 
293 		Query query = qb.bool()
294 			.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery())
295 			.must(textQuery)
296 			.createQuery();
297 
298 		FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class);
299 		ftq.setProjection("myContentText");
300 		ftq.setMaxResults(20);
301 
302 		List<?> resultList = ftq.getResultList();
303 		List<Suggestion> suggestions = Lists.newArrayList();
304 		for (Object next : resultList) {
305 			Object[] nextAsArray = (Object[]) next;
306 			String nextValue = (String) nextAsArray[0];
307 
308 			try {
309 				MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
310 				Scorer scorer = new QueryScorer(textQuery);
311 				Highlighter highlighter = new Highlighter(formatter, scorer);
312 				Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
313 
314 				formatter.setAnalyzer("myContentTextPhonetic");
315 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
316 
317 				formatter.setAnalyzer("myContentTextNGram");
318 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
319 
320 				formatter.setFindPhrasesWith();
321 				formatter.setAnalyzer("myContentTextEdgeNGram");
322 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
323 
324 			} catch (Exception e) {
325 				throw new InternalErrorException(e);
326 			}
327 
328 		}
329 
330 		Collections.sort(suggestions);
331 
332 		Set<String> terms = Sets.newHashSet();
333 		for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
334 			String nextTerm = iter.next().getTerm().toLowerCase();
335 			if (!terms.add(nextTerm)) {
336 				iter.remove();
337 			}
338 		}
339 
340 		long delay = System.currentTimeMillis() - start;
341 		ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[]{terms.size(), theText, delay});
342 
343 		return suggestions;
344 	}
345 
346 	public class MySuggestionFormatter implements Formatter {
347 
348 		private List<Suggestion> mySuggestions;
349 		private String myAnalyzer;
350 		private ArrayList<String> myPartialMatchPhrases;
351 		private ArrayList<Float> myPartialMatchScores;
352 		private String myOriginalSearch;
353 
354 		public MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
355 			myOriginalSearch = theOriginalSearch;
356 			mySuggestions = theSuggestions;
357 		}
358 
359 		@Override
360 		public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
361 			ourLog.debug("{} Found {} with score {}", new Object[]{myAnalyzer, theOriginalText, theTokenGroup.getTotalScore()});
362 			if (theTokenGroup.getTotalScore() > 0) {
363 				float score = theTokenGroup.getTotalScore();
364 				if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
365 					score = score + 1.0f;
366 				}
367 				mySuggestions.add(new Suggestion(theOriginalText, score));
368 			} else if (myPartialMatchPhrases != null) {
369 				if (theOriginalText.length() < 100) {
370 					for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
371 						if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
372 							mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
373 						}
374 					}
375 				}
376 			}
377 
378 			return null;
379 		}
380 
381 		public void setAnalyzer(String theString) {
382 			myAnalyzer = theString;
383 		}
384 
385 		public void setFindPhrasesWith() {
386 			myPartialMatchPhrases = new ArrayList<String>();
387 			myPartialMatchScores = new ArrayList<Float>();
388 
389 			for (Suggestion next : mySuggestions) {
390 				myPartialMatchPhrases.add(' ' + next.myTerm);
391 				myPartialMatchScores.add(next.myScore);
392 			}
393 
394 			myPartialMatchPhrases.add(myOriginalSearch);
395 			myPartialMatchScores.add(1.0f);
396 		}
397 
398 	}
399 
400 	public static class Suggestion implements Comparable<Suggestion> {
401 		private String myTerm;
402 		private float myScore;
403 
404 		public Suggestion(String theTerm, float theScore) {
405 			myTerm = theTerm;
406 			myScore = theScore;
407 		}
408 
409 		@Override
410 		public int compareTo(Suggestion theO) {
411 			return Float.compare(theO.myScore, myScore);
412 		}
413 
414 		public float getScore() {
415 			return myScore;
416 		}
417 
418 		public String getTerm() {
419 			return myTerm;
420 		}
421 
422 		@Override
423 		public String toString() {
424 			return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]";
425 		}
426 	}
427 
428 }