001/*- 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.phonetic.ApacheEncoder; 023import ca.uhn.fhir.context.phonetic.IPhoneticEncoder; 024import ca.uhn.fhir.context.phonetic.NumericEncoder; 025import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum; 026import org.apache.commons.codec.language.Caverphone1; 027import org.apache.commons.codec.language.Caverphone2; 028import org.apache.commons.codec.language.ColognePhonetic; 029import org.apache.commons.codec.language.DoubleMetaphone; 030import org.apache.commons.codec.language.MatchRatingApproachEncoder; 031import org.apache.commons.codec.language.Metaphone; 032import org.apache.commons.codec.language.Nysiis; 033import org.apache.commons.codec.language.RefinedSoundex; 034import org.apache.commons.codec.language.Soundex; 035import org.apache.commons.lang3.EnumUtils; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039public final class PhoneticEncoderUtil { 040 041 // embedded class only for parameter returns 042 private static class ParsedValues { 043 private final Integer maxCodeLength; 044 private final String encoderString; 045 046 public ParsedValues(String theString, Integer theMaxCode) { 047 maxCodeLength = theMaxCode; 048 encoderString = theString; 049 } 050 051 public Integer getMaxCodeLength() { 052 return maxCodeLength; 053 } 054 055 public String getEncoderString() { 056 return encoderString; 057 } 058 } 059 060 private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class); 061 062 private PhoneticEncoderUtil() {} 063 064 /** 065 * Creates the phonetic encoder wrapper from 066 * an input string. 067 * 068 * <p> 069 * String must be in the format of... 070 * </p> 071 * 072 * PhoneticEncoderEnum(MAX_LENGTH) 073 * 074 * @return The IPhoneticEncoder 075 */ 076 public static IPhoneticEncoder getEncoder(String theString) { 077 ParsedValues values = parseIntValue(theString); 078 String encoderType = values.getEncoderString(); 079 Integer encoderMaxString = values.getMaxCodeLength(); 080 081 IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString); 082 if (encoder != null) { 083 return encoder; 084 } else { 085 ourLog.warn("Invalid phonetic param string " + theString); 086 return null; 087 } 088 } 089 090 private static ParsedValues parseIntValue(String theString) { 091 String encoderType = null; 092 Integer encoderMaxString = null; 093 094 int braceIndex = theString.indexOf("("); 095 if (braceIndex != -1) { 096 int len = theString.length(); 097 if (theString.charAt(len - 1) == ')') { 098 encoderType = theString.substring(0, braceIndex); 099 String num = theString.substring(braceIndex + 1, len - 1); 100 try { 101 encoderMaxString = Integer.parseInt(num); 102 } catch (NumberFormatException ex) { 103 // invalid number parse error 104 } 105 106 if (encoderMaxString == null || encoderMaxString < 0) { 107 // parse error 108 ourLog.error("Invalid encoder max character length: " + num); 109 encoderType = null; 110 } 111 } 112 // else - parse error 113 } else { 114 encoderType = theString; 115 } 116 117 return new ParsedValues(encoderType, encoderMaxString); 118 } 119 120 private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) { 121 IPhoneticEncoder encoder = null; 122 PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName); 123 124 if (enumVal != null) { 125 switch (enumVal) { 126 case CAVERPHONE1: 127 Caverphone1 caverphone1 = new Caverphone1(); 128 encoder = new ApacheEncoder(theName, caverphone1); 129 break; 130 case CAVERPHONE2: 131 Caverphone2 caverphone2 = new Caverphone2(); 132 encoder = new ApacheEncoder(theName, caverphone2); 133 break; 134 case COLOGNE: 135 ColognePhonetic colognePhonetic = new ColognePhonetic(); 136 encoder = new ApacheEncoder(theName, colognePhonetic); 137 break; 138 case DOUBLE_METAPHONE: 139 DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); 140 if (theMax != null) { 141 doubleMetaphone.setMaxCodeLen(theMax); 142 } 143 encoder = new ApacheEncoder(theName, doubleMetaphone); 144 break; 145 case MATCH_RATING_APPROACH: 146 MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder(); 147 encoder = new ApacheEncoder(theName, matchRatingApproachEncoder); 148 break; 149 case METAPHONE: 150 Metaphone metaphone = new Metaphone(); 151 if (theMax != null) { 152 metaphone.setMaxCodeLen(theMax); 153 } 154 encoder = new ApacheEncoder(theName, metaphone); 155 break; 156 case NYSIIS: 157 Nysiis nysiis = new Nysiis(); 158 encoder = new ApacheEncoder(theName, nysiis); 159 break; 160 case NYSIIS_LONG: 161 Nysiis nysiis1_long = new Nysiis(false); 162 encoder = new ApacheEncoder(theName, nysiis1_long); 163 break; 164 case REFINED_SOUNDEX: 165 RefinedSoundex refinedSoundex = new RefinedSoundex(); 166 encoder = new ApacheEncoder(theName, refinedSoundex); 167 break; 168 case SOUNDEX: 169 Soundex soundex = new Soundex(); 170 // soundex has deprecated setting the max size 171 encoder = new ApacheEncoder(theName, soundex); 172 break; 173 case NUMERIC: 174 encoder = new NumericEncoder(); 175 break; 176 default: 177 // we don't ever expect to be here 178 // this log message is purely for devs who update this 179 // enum, but not this method 180 ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name()); 181 break; 182 } 183 } 184 return encoder; 185 } 186}