View Javadoc
1   package ca.uhn.fhir.jpa.dao;
2   
3   /*
4    * #%L
5    * HAPI FHIR JPA Server
6    * %%
7    * Copyright (C) 2014 - 2018 University Health Network
8    * %%
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   * 
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   * 
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * #L%
21   */
22  
23  import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
24  import ca.uhn.fhir.jpa.entity.ResourceTable;
25  import ca.uhn.fhir.model.api.IQueryParameterType;
26  import ca.uhn.fhir.rest.api.Constants;
27  import ca.uhn.fhir.rest.param.StringParam;
28  import ca.uhn.fhir.rest.param.TokenParam;
29  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
30  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
31  import com.google.common.collect.Lists;
32  import com.google.common.collect.Sets;
33  import org.apache.commons.lang3.StringUtils;
34  import org.apache.commons.lang3.Validate;
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.search.Query;
37  import org.apache.lucene.search.highlight.Formatter;
38  import org.apache.lucene.search.highlight.*;
39  import org.hibernate.search.jpa.FullTextEntityManager;
40  import org.hibernate.search.jpa.FullTextQuery;
41  import org.hibernate.search.query.dsl.BooleanJunction;
42  import org.hibernate.search.query.dsl.QueryBuilder;
43  import org.hl7.fhir.dstu3.model.BaseResource;
44  import org.hl7.fhir.instance.model.api.IBaseResource;
45  import org.springframework.beans.factory.annotation.Autowired;
46  import org.springframework.transaction.PlatformTransactionManager;
47  import org.springframework.transaction.annotation.Transactional;
48  import org.springframework.transaction.support.TransactionTemplate;
49  
50  import javax.persistence.EntityManager;
51  import javax.persistence.PersistenceContext;
52  import javax.persistence.PersistenceContextType;
53  import java.util.*;
54  
55  import static org.apache.commons.lang3.StringUtils.isNotBlank;
56  
57  public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
58  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
59  
60  	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
61  	private EntityManager myEntityManager;
62  	@Autowired
63  	private PlatformTransactionManager myTxManager;
64  
65  	@Autowired
66  	protected IForcedIdDao myForcedIdDao;
67  
68  	private Boolean ourDisabled;
69  
70  	@Autowired
71  	private DaoConfig myDaoConfig;
72  
73  	/**
74  	 * Constructor
75  	 */
76  	public FulltextSearchSvcImpl() {
77  		super();
78  	}
79  
80  	private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<? extends IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) {
81  		if (theTerms == null) {
82  			return;
83  		}
84  		for (List<? extends IQueryParameterType> nextAnd : theTerms) {
85  			Set<String> terms = new HashSet<>();
86  			for (IQueryParameterType nextOr : nextAnd) {
87  				StringParam nextOrString = (StringParam) nextOr;
88  				String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim();
89  				if (isNotBlank(nextValueTrimmed)) {
90  					terms.add(nextValueTrimmed);
91  				}
92  			}
93  			if (terms.isEmpty() == false) {
94  				if (terms.size() == 1) {
95  					//@formatter:off
96  					Query textQuery = theQueryBuilder
97  						.phrase()
98  						.withSlop(2)
99  						.onField(theFieldName).boostedTo(4.0f)
100 //						.andField(theFieldNameEdgeNGram).boostedTo(2.0f)
101 //						.andField(theFieldNameNGram).boostedTo(1.0f)
102 						.sentence(terms.iterator().next().toLowerCase()).createQuery();
103 					//@formatter:on
104 
105 					theBoolean.must(textQuery);
106 				} else {
107 					String joinedTerms = StringUtils.join(terms, ' ');
108 					theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery());
109 				}
110 			}
111 		}
112 	}
113 
114 	private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) {
115 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
116 
117 		List<Long> pids = null;
118 		
119 		/*
120 		 * Handle textual params
121 		 */
122 		/*
123 		for (String nextParamName : theParams.keySet()) {
124 			for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) {
125 				for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) {
126 					IQueryParameterType nextParam = orIterator.next();
127 					if (nextParam instanceof TokenParam) {
128 						TokenParam nextTokenParam = (TokenParam) nextParam;
129 						if (nextTokenParam.isText()) {
130 							orIterator.remove();
131 							QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get();
132 							BooleanJunction<?> bool = qb.bool();
133 
134 							bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery());
135 							if (isNotBlank(theResourceName)) {
136 								bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
137 							}
138 //							
139 							//@formatter:off
140 							String value = nextTokenParam.getValue().toLowerCase();
141 							bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery());
142 							
143 							//@formatter:on
144 							
145 							FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class);
146 
147 							List<?> resultList = ftq.getResultList();
148 							pids = new ArrayList<Long>();
149 							for (Object next : resultList) {
150 								ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next;
151 								pids.add(nextAsArray.getResourcePid());
152 							}
153 						}
154 					}
155 				}
156 			}
157 		}
158 		
159 		if (pids != null && pids.isEmpty()) {
160 			return pids;
161 		}
162 		*/
163 
164 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
165 		BooleanJunction<?> bool = qb.bool();
166 
167 		/*
168 		 * Handle _content parameter (resource body content)
169 		 */
170 		List<List<? extends IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT);
171 		addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram");
172 
173 		/*
174 		 * Handle _text parameter (resource narrative content)
175 		 */
176 		List<List<? extends IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT);
177 		addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram");
178 
179 		if (theReferencingPid != null) {
180 			bool.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(theReferencingPid).createQuery());
181 		}
182 
183 		if (bool.isEmpty()) {
184 			return pids;
185 		}
186 
187 		if (isNotBlank(theResourceName)) {
188 			bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
189 		}
190 
191 		Query luceneQuery = bool.createQuery();
192 
193 		// wrap Lucene query in a javax.persistence.Query
194 		FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class);
195 		jpaQuery.setProjection("myId");
196 
197 		// execute search
198 		List<?> result = jpaQuery.getResultList();
199 
200 		HashSet<Long> pidsSet = pids != null ? new HashSet<Long>(pids) : null;
201 
202 		ArrayList<Long> retVal = new ArrayList<Long>();
203 		for (Object object : result) {
204 			Object[] nextArray = (Object[]) object;
205 			Long next = (Long) nextArray[0];
206 			if (next != null && (pidsSet == null || pidsSet.contains(next))) {
207 				retVal.add(next);
208 			}
209 		}
210 
211 		return retVal;
212 	}
213 
214 	@Override
215 	public List<Long> everything(String theResourceName, SearchParameterMap theParams) {
216 
217 		Long pid = null;
218 		if (theParams.get(BaseResource.SP_RES_ID) != null) {
219 			String idParamValue;
220 			IQueryParameterType idParam = theParams.get(BaseResource.SP_RES_ID).get(0).get(0);
221 			if (idParam instanceof TokenParam) {
222 				TokenParam idParm = (TokenParam) idParam;
223 				idParamValue = idParm.getValue();
224 			} else {
225 				StringParam idParm = (StringParam) idParam;
226 				idParamValue = idParm.getValue();
227 			}
228 			pid = BaseHapiFhirDao.translateForcedIdToPid(myDaoConfig, theResourceName, idParamValue, myForcedIdDao);
229 		}
230 
231 		Long referencingPid = pid;
232 		List<Long> retVal = doSearch(null, theParams, referencingPid);
233 		if (referencingPid != null) {
234 			retVal.add(referencingPid);
235 		}
236 		return retVal;
237 	}
238 
239 	@Override
240 	public boolean isDisabled() {
241 		Boolean retVal = ourDisabled;
242 
243 		if (retVal == null) {
244 			retVal = new TransactionTemplate(myTxManager).execute(t -> {
245 				try {
246 					FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
247 					em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
248 					return Boolean.FALSE;
249 				} catch (Exception e) {
250 					ourLog.trace("FullText test failed", e);
251 					ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled");
252 					return Boolean.TRUE;
253 				}
254 			});
255 			ourDisabled = retVal;
256 		}
257 
258 		assert retVal != null;
259 		return retVal;
260 	}
261 
262 	@Transactional()
263 	@Override
264 	public List<Long> search(String theResourceName, SearchParameterMap theParams) {
265 		return doSearch(theResourceName, theParams, null);
266 	}
267 
268 	@Transactional()
269 	@Override
270 	public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) {
271 		Validate.notBlank(theContext, "theContext must be provided");
272 		Validate.notBlank(theSearchParam, "theSearchParam must be provided");
273 		Validate.notBlank(theText, "theSearchParam must be provided");
274 
275 		long start = System.currentTimeMillis();
276 
277 		String[] contextParts = StringUtils.split(theContext, '/');
278 		if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) {
279 			throw new InvalidRequestException("Invalid context: " + theContext);
280 		}
281 		Long pid = BaseHapiFhirDao.translateForcedIdToPid( myDaoConfig, contextParts[0], contextParts[1], myForcedIdDao);
282 
283 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
284 
285 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
286 
287 		Query textQuery = qb
288 			.phrase()
289 			.withSlop(2)
290 			.onField("myContentText").boostedTo(4.0f)
291 			.andField("myContentTextEdgeNGram").boostedTo(2.0f)
292 			.andField("myContentTextNGram").boostedTo(1.0f)
293 			.andField("myContentTextPhonetic").boostedTo(0.5f)
294 			.sentence(theText.toLowerCase()).createQuery();
295 
296 		Query query = qb.bool()
297 			.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery())
298 			.must(textQuery)
299 			.createQuery();
300 
301 		FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class);
302 		ftq.setProjection("myContentText");
303 		ftq.setMaxResults(20);
304 
305 		List<?> resultList = ftq.getResultList();
306 		List<Suggestion> suggestions = Lists.newArrayList();
307 		for (Object next : resultList) {
308 			Object[] nextAsArray = (Object[]) next;
309 			String nextValue = (String) nextAsArray[0];
310 
311 			try {
312 				MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
313 				Scorer scorer = new QueryScorer(textQuery);
314 				Highlighter highlighter = new Highlighter(formatter, scorer);
315 				Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
316 
317 				formatter.setAnalyzer("myContentTextPhonetic");
318 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
319 
320 				formatter.setAnalyzer("myContentTextNGram");
321 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
322 
323 				formatter.setFindPhrasesWith();
324 				formatter.setAnalyzer("myContentTextEdgeNGram");
325 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
326 
327 			} catch (Exception e) {
328 				throw new InternalErrorException(e);
329 			}
330 
331 		}
332 
333 		Collections.sort(suggestions);
334 
335 		Set<String> terms = Sets.newHashSet();
336 		for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
337 			String nextTerm = iter.next().getTerm().toLowerCase();
338 			if (!terms.add(nextTerm)) {
339 				iter.remove();
340 			}
341 		}
342 
343 		long delay = System.currentTimeMillis() - start;
344 		ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[]{terms.size(), theText, delay});
345 
346 		return suggestions;
347 	}
348 
349 	public class MySuggestionFormatter implements Formatter {
350 
351 		private List<Suggestion> mySuggestions;
352 		private String myAnalyzer;
353 		private ArrayList<String> myPartialMatchPhrases;
354 		private ArrayList<Float> myPartialMatchScores;
355 		private String myOriginalSearch;
356 
357 		public MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
358 			myOriginalSearch = theOriginalSearch;
359 			mySuggestions = theSuggestions;
360 		}
361 
362 		@Override
363 		public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
364 			ourLog.debug("{} Found {} with score {}", new Object[]{myAnalyzer, theOriginalText, theTokenGroup.getTotalScore()});
365 			if (theTokenGroup.getTotalScore() > 0) {
366 				float score = theTokenGroup.getTotalScore();
367 				if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
368 					score = score + 1.0f;
369 				}
370 				mySuggestions.add(new Suggestion(theOriginalText, score));
371 			} else if (myPartialMatchPhrases != null) {
372 				if (theOriginalText.length() < 100) {
373 					for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
374 						if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
375 							mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
376 						}
377 					}
378 				}
379 			}
380 
381 			return null;
382 		}
383 
384 		public void setAnalyzer(String theString) {
385 			myAnalyzer = theString;
386 		}
387 
388 		public void setFindPhrasesWith() {
389 			myPartialMatchPhrases = new ArrayList<String>();
390 			myPartialMatchScores = new ArrayList<Float>();
391 
392 			for (Suggestion next : mySuggestions) {
393 				myPartialMatchPhrases.add(' ' + next.myTerm);
394 				myPartialMatchScores.add(next.myScore);
395 			}
396 
397 			myPartialMatchPhrases.add(myOriginalSearch);
398 			myPartialMatchScores.add(1.0f);
399 		}
400 
401 	}
402 
403 	public static class Suggestion implements Comparable<Suggestion> {
404 		private String myTerm;
405 		private float myScore;
406 
407 		public Suggestion(String theTerm, float theScore) {
408 			myTerm = theTerm;
409 			myScore = theScore;
410 		}
411 
412 		@Override
413 		public int compareTo(Suggestion theO) {
414 			return Float.compare(theO.myScore, myScore);
415 		}
416 
417 		public float getScore() {
418 			return myScore;
419 		}
420 
421 		public String getTerm() {
422 			return myTerm;
423 		}
424 
425 		@Override
426 		public String toString() {
427 			return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]";
428 		}
429 	}
430 
431 }