View Javadoc
1   package ca.uhn.fhir.jpa.dao;
2   
3   /*
4    * #%L
5    * HAPI FHIR JPA Server
6    * %%
7    * Copyright (C) 2014 - 2018 University Health Network
8    * %%
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   * 
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   * 
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * #L%
21   */
22  
23  import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
24  import ca.uhn.fhir.jpa.entity.ResourceTable;
25  import ca.uhn.fhir.model.api.IQueryParameterType;
26  import ca.uhn.fhir.rest.api.Constants;
27  import ca.uhn.fhir.rest.param.StringParam;
28  import ca.uhn.fhir.rest.param.TokenParam;
29  import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
30  import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
31  import com.google.common.collect.Lists;
32  import com.google.common.collect.Sets;
33  import org.apache.commons.lang3.StringUtils;
34  import org.apache.commons.lang3.Validate;
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.search.Query;
37  import org.apache.lucene.search.highlight.Formatter;
38  import org.apache.lucene.search.highlight.*;
39  import org.hibernate.search.jpa.FullTextEntityManager;
40  import org.hibernate.search.jpa.FullTextQuery;
41  import org.hibernate.search.query.dsl.BooleanJunction;
42  import org.hibernate.search.query.dsl.QueryBuilder;
43  import org.hl7.fhir.dstu3.model.BaseResource;
44  import org.hl7.fhir.instance.model.api.IBaseResource;
45  import org.springframework.beans.factory.annotation.Autowired;
46  import org.springframework.transaction.annotation.Transactional;
47  
48  import javax.persistence.EntityManager;
49  import javax.persistence.PersistenceContext;
50  import javax.persistence.PersistenceContextType;
51  import java.util.*;
52  
53  import static org.apache.commons.lang3.StringUtils.isNotBlank;
54  
55  public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
56  	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
57  
58  	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
59  	private EntityManager myEntityManager;
60  
61  	@Autowired
62  	protected IForcedIdDao myForcedIdDao;
63  
64  	/**
65  	 * Constructor
66  	 */
67  	public FulltextSearchSvcImpl() {
68  		super();
69  	}
70  
71  	private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<? extends IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) {
72  		if (theTerms == null) {
73  			return;
74  		}
75  		for (List<? extends IQueryParameterType> nextAnd : theTerms) {
76  			Set<String> terms = new HashSet<String>();
77  			for (IQueryParameterType nextOr : nextAnd) {
78  				StringParam nextOrString = (StringParam) nextOr;
79  				String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim();
80  				if (isNotBlank(nextValueTrimmed)) {
81  					terms.add(nextValueTrimmed);
82  				}
83  			}
84  			if (terms.isEmpty() == false) {
85  				if (terms.size() == 1) {
86  					//@formatter:off
87  					Query textQuery = theQueryBuilder
88  						.phrase()
89  						.withSlop(2)
90  						.onField(theFieldName).boostedTo(4.0f)
91  //						.andField(theFieldNameEdgeNGram).boostedTo(2.0f)
92  //						.andField(theFieldNameNGram).boostedTo(1.0f)
93  						.sentence(terms.iterator().next().toLowerCase()).createQuery();
94  					//@formatter:on
95  
96  					theBoolean.must(textQuery);
97  				} else {
98  					String joinedTerms = StringUtils.join(terms, ' ');
99  					theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery());
100 				}
101 			}
102 		}
103 	}
104 
105 	private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) {
106 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
107 
108 		List<Long> pids = null;
109 		
110 		/*
111 		 * Handle textual params
112 		 */
113 		/*
114 		for (String nextParamName : theParams.keySet()) {
115 			for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) {
116 				for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) {
117 					IQueryParameterType nextParam = orIterator.next();
118 					if (nextParam instanceof TokenParam) {
119 						TokenParam nextTokenParam = (TokenParam) nextParam;
120 						if (nextTokenParam.isText()) {
121 							orIterator.remove();
122 							QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get();
123 							BooleanJunction<?> bool = qb.bool();
124 
125 							bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery());
126 							if (isNotBlank(theResourceName)) {
127 								bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
128 							}
129 //							
130 							//@formatter:off
131 							String value = nextTokenParam.getValue().toLowerCase();
132 							bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery());
133 							
134 							//@formatter:on
135 							
136 							FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class);
137 
138 							List<?> resultList = ftq.getResultList();
139 							pids = new ArrayList<Long>();
140 							for (Object next : resultList) {
141 								ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next;
142 								pids.add(nextAsArray.getResourcePid());
143 							}
144 						}
145 					}
146 				}
147 			}
148 		}
149 		
150 		if (pids != null && pids.isEmpty()) {
151 			return pids;
152 		}
153 		*/
154 
155 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
156 		BooleanJunction<?> bool = qb.bool();
157 
158 		/*
159 		 * Handle _content parameter (resource body content)
160 		 */
161 		List<List<? extends IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT);
162 		addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram");
163 
164 		/*
165 		 * Handle _text parameter (resource narrative content)
166 		 */
167 		List<List<? extends IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT);
168 		addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram");
169 
170 		if (theReferencingPid != null) {
171 			bool.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(theReferencingPid).createQuery());
172 		}
173 
174 		if (bool.isEmpty()) {
175 			return pids;
176 		}
177 
178 		if (isNotBlank(theResourceName)) {
179 			bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery());
180 		}
181 
182 		Query luceneQuery = bool.createQuery();
183 
184 		// wrap Lucene query in a javax.persistence.Query
185 		FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class);
186 		jpaQuery.setProjection("myId");
187 
188 		// execute search
189 		List<?> result = jpaQuery.getResultList();
190 
191 		HashSet<Long> pidsSet = pids != null ? new HashSet<Long>(pids) : null;
192 
193 		ArrayList<Long> retVal = new ArrayList<Long>();
194 		for (Object object : result) {
195 			Object[] nextArray = (Object[]) object;
196 			Long next = (Long) nextArray[0];
197 			if (next != null && (pidsSet == null || pidsSet.contains(next))) {
198 				retVal.add(next);
199 			}
200 		}
201 
202 		return retVal;
203 	}
204 
205 	@Override
206 	public List<Long> everything(String theResourceName, SearchParameterMap theParams) {
207 
208 		Long pid = null;
209 		if (theParams.get(BaseResource.SP_RES_ID) != null) {
210 			String idParamValue;
211 			IQueryParameterType idParam = theParams.get(BaseResource.SP_RES_ID).get(0).get(0);
212 			if (idParam instanceof TokenParam) {
213 				TokenParam idParm = (TokenParam) idParam;
214 				idParamValue = idParm.getValue();
215 			} else {
216 				StringParam idParm = (StringParam) idParam;
217 				idParamValue = idParm.getValue();
218 			}
219 			pid = BaseHapiFhirDao.translateForcedIdToPid(theResourceName, idParamValue, myForcedIdDao);
220 		}
221 
222 		Long referencingPid = pid;
223 		List<Long> retVal = doSearch(null, theParams, referencingPid);
224 		if (referencingPid != null) {
225 			retVal.add(referencingPid);
226 		}
227 		return retVal;
228 	}
229 
230 	@Override
231 	public boolean isDisabled() {
232 		try {
233 			FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
234 			em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
235 		} catch (Exception e) {
236 			ourLog.trace("FullText test failed", e);
237 			ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled");
238 			return true;
239 		}
240 		return false;
241 	}
242 
243 	@Transactional()
244 	@Override
245 	public List<Long> search(String theResourceName, SearchParameterMap theParams) {
246 		return doSearch(theResourceName, theParams, null);
247 	}
248 
249 	@Override
250 	public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) {
251 		Validate.notBlank(theContext, "theContext must be provided");
252 		Validate.notBlank(theSearchParam, "theSearchParam must be provided");
253 		Validate.notBlank(theText, "theSearchParam must be provided");
254 
255 		long start = System.currentTimeMillis();
256 
257 		String[] contextParts = StringUtils.split(theContext, '/');
258 		if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) {
259 			throw new InvalidRequestException("Invalid context: " + theContext);
260 		}
261 		Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao);
262 
263 		FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager);
264 
265 		QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get();
266 
267 		Query textQuery = qb
268 			.phrase()
269 			.withSlop(2)
270 			.onField("myContentText").boostedTo(4.0f)
271 			.andField("myContentTextEdgeNGram").boostedTo(2.0f)
272 			.andField("myContentTextNGram").boostedTo(1.0f)
273 			.andField("myContentTextPhonetic").boostedTo(0.5f)
274 			.sentence(theText.toLowerCase()).createQuery();
275 
276 		Query query = qb.bool()
277 			.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery())
278 			.must(textQuery)
279 			.createQuery();
280 
281 		FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class);
282 		ftq.setProjection("myContentText");
283 		ftq.setMaxResults(20);
284 
285 		List<?> resultList = ftq.getResultList();
286 		List<Suggestion> suggestions = Lists.newArrayList();
287 		for (Object next : resultList) {
288 			Object[] nextAsArray = (Object[]) next;
289 			String nextValue = (String) nextAsArray[0];
290 
291 			try {
292 				MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
293 				Scorer scorer = new QueryScorer(textQuery);
294 				Highlighter highlighter = new Highlighter(formatter, scorer);
295 				Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
296 
297 				formatter.setAnalyzer("myContentTextPhonetic");
298 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
299 
300 				formatter.setAnalyzer("myContentTextNGram");
301 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
302 
303 				formatter.setFindPhrasesWith();
304 				formatter.setAnalyzer("myContentTextEdgeNGram");
305 				highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
306 
307 			} catch (Exception e) {
308 				throw new InternalErrorException(e);
309 			}
310 
311 		}
312 
313 		Collections.sort(suggestions);
314 
315 		Set<String> terms = Sets.newHashSet();
316 		for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
317 			String nextTerm = iter.next().getTerm().toLowerCase();
318 			if (!terms.add(nextTerm)) {
319 				iter.remove();
320 			}
321 		}
322 
323 		long delay = System.currentTimeMillis() - start;
324 		ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[]{terms.size(), theText, delay});
325 
326 		return suggestions;
327 	}
328 
329 	public class MySuggestionFormatter implements Formatter {
330 
331 		private List<Suggestion> mySuggestions;
332 		private String myAnalyzer;
333 		private ArrayList<String> myPartialMatchPhrases;
334 		private ArrayList<Float> myPartialMatchScores;
335 		private String myOriginalSearch;
336 
337 		public MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
338 			myOriginalSearch = theOriginalSearch;
339 			mySuggestions = theSuggestions;
340 		}
341 
342 		@Override
343 		public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
344 			ourLog.debug("{} Found {} with score {}", new Object[]{myAnalyzer, theOriginalText, theTokenGroup.getTotalScore()});
345 			if (theTokenGroup.getTotalScore() > 0) {
346 				float score = theTokenGroup.getTotalScore();
347 				if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
348 					score = score + 1.0f;
349 				}
350 				mySuggestions.add(new Suggestion(theOriginalText, score));
351 			} else if (myPartialMatchPhrases != null) {
352 				if (theOriginalText.length() < 100) {
353 					for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
354 						if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
355 							mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
356 						}
357 					}
358 				}
359 			}
360 
361 			return null;
362 		}
363 
364 		public void setAnalyzer(String theString) {
365 			myAnalyzer = theString;
366 		}
367 
368 		public void setFindPhrasesWith() {
369 			myPartialMatchPhrases = new ArrayList<String>();
370 			myPartialMatchScores = new ArrayList<Float>();
371 
372 			for (Suggestion next : mySuggestions) {
373 				myPartialMatchPhrases.add(' ' + next.myTerm);
374 				myPartialMatchScores.add(next.myScore);
375 			}
376 
377 			myPartialMatchPhrases.add(myOriginalSearch);
378 			myPartialMatchScores.add(1.0f);
379 		}
380 
381 	}
382 
383 	public static class Suggestion implements Comparable<Suggestion> {
384 		private String myTerm;
385 		private float myScore;
386 
387 		public Suggestion(String theTerm, float theScore) {
388 			myTerm = theTerm;
389 			myScore = theScore;
390 		}
391 
392 		@Override
393 		public int compareTo(Suggestion theO) {
394 			return Float.compare(theO.myScore, myScore);
395 		}
396 
397 		public float getScore() {
398 			return myScore;
399 		}
400 
401 		public String getTerm() {
402 			return myTerm;
403 		}
404 
405 		@Override
406 		public String toString() {
407 			return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]";
408 		}
409 	}
410 
411 }