001/*-
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.context.phonetic;
021
022import org.apache.commons.codec.EncoderException;
023import org.apache.commons.codec.StringEncoder;
024import org.apache.commons.lang3.StringUtils;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028import java.util.StringJoiner;
029
030public class ApacheEncoder implements IPhoneticEncoder {
031        private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
032
033        private final String myName;
034        private final StringEncoder myStringEncoder;
035
036        public ApacheEncoder(String theName, StringEncoder theStringEncoder) {
037                myName = theName;
038                myStringEncoder = theStringEncoder;
039        }
040
041        @Override
042        public String name() {
043                return myName;
044        }
045
046        @Override
047        public String encode(String theString) {
048                try {
049                        // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in
050                        // address lines.
051                        if (theString.contains(" ")) {
052                                return encodeStringWithSpaces(theString);
053                        }
054                        return myStringEncoder.encode(theString);
055                } catch (EncoderException e) {
056                        ourLog.error("Failed to encode string " + theString, e);
057                        return theString;
058                }
059        }
060
061        private String encodeStringWithSpaces(String theString) throws EncoderException {
062                StringJoiner joiner = new StringJoiner(" ");
063
064                // This sub-stack holds the alpha parts
065                StringJoiner alphaJoiner = new StringJoiner(" ");
066
067                for (String part : theString.split("[\\s\\W]+")) {
068                        if (StringUtils.isAlpha(part)) {
069                                alphaJoiner.add(part);
070                        } else {
071                                // Once we hit a non-alpha part, encode all the alpha parts together as a single string
072                                // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
073                                alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
074                                joiner.add(part);
075                        }
076                }
077                encodeAlphaParts(joiner, alphaJoiner);
078
079                return joiner.toString();
080        }
081
082        private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
083                // Encode the alpha parts as a single string and then flush the alpha encoder
084                if (theAlphaJoiner.length() > 0) {
085                        theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
086                        theAlphaJoiner = new StringJoiner(" ");
087                }
088                return theAlphaJoiner;
089        }
090}