001/*- 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.context.phonetic; 021 022import org.apache.commons.codec.EncoderException; 023import org.apache.commons.codec.StringEncoder; 024import org.apache.commons.lang3.StringUtils; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028import java.util.StringJoiner; 029 030public class ApacheEncoder implements IPhoneticEncoder { 031 private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class); 032 033 private final String myName; 034 private final StringEncoder myStringEncoder; 035 036 public ApacheEncoder(String theName, StringEncoder theStringEncoder) { 037 myName = theName; 038 myStringEncoder = theStringEncoder; 039 } 040 041 @Override 042 public String name() { 043 return myName; 044 } 045 046 @Override 047 public String encode(String theString) { 048 try { 049 // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in 050 // address lines. 051 if (theString.contains(" ")) { 052 return encodeStringWithSpaces(theString); 053 } 054 return myStringEncoder.encode(theString); 055 } catch (EncoderException e) { 056 ourLog.error("Failed to encode string " + theString, e); 057 return theString; 058 } 059 } 060 061 private String encodeStringWithSpaces(String theString) throws EncoderException { 062 StringJoiner joiner = new StringJoiner(" "); 063 064 // This sub-stack holds the alpha parts 065 StringJoiner alphaJoiner = new StringJoiner(" "); 066 067 for (String part : theString.split("[\\s\\W]+")) { 068 if (StringUtils.isAlpha(part)) { 069 alphaJoiner.add(part); 070 } else { 071 // Once we hit a non-alpha part, encode all the alpha parts together as a single string 072 // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter 073 alphaJoiner = encodeAlphaParts(joiner, alphaJoiner); 074 joiner.add(part); 075 } 076 } 077 encodeAlphaParts(joiner, alphaJoiner); 078 079 return joiner.toString(); 080 } 081 082 private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException { 083 // Encode the alpha parts as a single string and then flush the alpha encoder 084 if (theAlphaJoiner.length() > 0) { 085 theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString())); 086 theAlphaJoiner = new StringJoiner(" "); 087 } 088 return theAlphaJoiner; 089 } 090}