001/*-
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.context.phonetic.ApacheEncoder;
023import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
024import ca.uhn.fhir.context.phonetic.NumericEncoder;
025import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
026import org.apache.commons.codec.language.Caverphone1;
027import org.apache.commons.codec.language.Caverphone2;
028import org.apache.commons.codec.language.ColognePhonetic;
029import org.apache.commons.codec.language.DoubleMetaphone;
030import org.apache.commons.codec.language.MatchRatingApproachEncoder;
031import org.apache.commons.codec.language.Metaphone;
032import org.apache.commons.codec.language.Nysiis;
033import org.apache.commons.codec.language.RefinedSoundex;
034import org.apache.commons.codec.language.Soundex;
035import org.apache.commons.lang3.EnumUtils;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039public final class PhoneticEncoderUtil {
040
041        // embedded class only for parameter returns
042        private static class ParsedValues {
043                private final Integer maxCodeLength;
044                private final String encoderString;
045
046                public ParsedValues(String theString, Integer theMaxCode) {
047                        maxCodeLength = theMaxCode;
048                        encoderString = theString;
049                }
050
051                public Integer getMaxCodeLength() {
052                        return maxCodeLength;
053                }
054
055                public String getEncoderString() {
056                        return encoderString;
057                }
058        }
059
060        private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class);
061
062        private PhoneticEncoderUtil() {}
063
064        /**
065         * Creates the phonetic encoder wrapper from
066         * an input string.
067         *
068         * <p>
069         * String must be in the format of...
070         *      </p>
071         *
072         * PhoneticEncoderEnum(MAX_LENGTH)
073         *
074         * @return The IPhoneticEncoder
075         */
076        public static IPhoneticEncoder getEncoder(String theString) {
077                ParsedValues values = parseIntValue(theString);
078                String encoderType = values.getEncoderString();
079                Integer encoderMaxString = values.getMaxCodeLength();
080
081                IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString);
082                if (encoder != null) {
083                        return encoder;
084                } else {
085                        ourLog.warn("Invalid phonetic param string " + theString);
086                        return null;
087                }
088        }
089
090        private static ParsedValues parseIntValue(String theString) {
091                String encoderType = null;
092                Integer encoderMaxString = null;
093
094                int braceIndex = theString.indexOf("(");
095                if (braceIndex != -1) {
096                        int len = theString.length();
097                        if (theString.charAt(len - 1) == ')') {
098                                encoderType = theString.substring(0, braceIndex);
099                                String num = theString.substring(braceIndex + 1, len - 1);
100                                try {
101                                        encoderMaxString = Integer.parseInt(num);
102                                } catch (NumberFormatException ex) {
103                                        // invalid number parse error
104                                }
105
106                                if (encoderMaxString == null || encoderMaxString < 0) {
107                                        // parse error
108                                        ourLog.error("Invalid encoder max character length: " + num);
109                                        encoderType = null;
110                                }
111                        }
112                        // else - parse error
113                } else {
114                        encoderType = theString;
115                }
116
117                return new ParsedValues(encoderType, encoderMaxString);
118        }
119
120        private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) {
121                IPhoneticEncoder encoder = null;
122                PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName);
123
124                if (enumVal != null) {
125                        switch (enumVal) {
126                                case CAVERPHONE1:
127                                        Caverphone1 caverphone1 = new Caverphone1();
128                                        encoder = new ApacheEncoder(theName, caverphone1);
129                                        break;
130                                case CAVERPHONE2:
131                                        Caverphone2 caverphone2 = new Caverphone2();
132                                        encoder = new ApacheEncoder(theName, caverphone2);
133                                        break;
134                                case COLOGNE:
135                                        ColognePhonetic colognePhonetic = new ColognePhonetic();
136                                        encoder = new ApacheEncoder(theName, colognePhonetic);
137                                        break;
138                                case DOUBLE_METAPHONE:
139                                        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
140                                        if (theMax != null) {
141                                                doubleMetaphone.setMaxCodeLen(theMax);
142                                        }
143                                        encoder = new ApacheEncoder(theName, doubleMetaphone);
144                                        break;
145                                case MATCH_RATING_APPROACH:
146                                        MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder();
147                                        encoder = new ApacheEncoder(theName, matchRatingApproachEncoder);
148                                        break;
149                                case METAPHONE:
150                                        Metaphone metaphone = new Metaphone();
151                                        if (theMax != null) {
152                                                metaphone.setMaxCodeLen(theMax);
153                                        }
154                                        encoder = new ApacheEncoder(theName, metaphone);
155                                        break;
156                                case NYSIIS:
157                                        Nysiis nysiis = new Nysiis();
158                                        encoder = new ApacheEncoder(theName, nysiis);
159                                        break;
160                                case NYSIIS_LONG:
161                                        Nysiis nysiis1_long = new Nysiis(false);
162                                        encoder = new ApacheEncoder(theName, nysiis1_long);
163                                        break;
164                                case REFINED_SOUNDEX:
165                                        RefinedSoundex refinedSoundex = new RefinedSoundex();
166                                        encoder = new ApacheEncoder(theName, refinedSoundex);
167                                        break;
168                                case SOUNDEX:
169                                        Soundex soundex = new Soundex();
170                                        // soundex has deprecated setting the max size
171                                        encoder = new ApacheEncoder(theName, soundex);
172                                        break;
173                                case NUMERIC:
174                                        encoder = new NumericEncoder();
175                                        break;
176                                default:
177                                        // we don't ever expect to be here
178                                        // this log message is purely for devs who update this
179                                        // enum, but not this method
180                                        ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name());
181                                        break;
182                        }
183                }
184                return encoder;
185        }
186}