001/*- 002 * #%L 003 * HAPI FHIR - Server Framework 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.rest.server.interceptor.s13n.standardizers; 021 022import org.apache.commons.lang3.StringUtils; 023import org.apache.commons.text.CaseUtils; 024 025import java.util.Arrays; 026import java.util.HashSet; 027import java.util.Set; 028import java.util.stream.Collectors; 029 030/** 031 * Standardizes first name by capitalizing all characters following a separators (e.g. -, ') and removing noise characters. 032 */ 033public class FirstNameStandardizer extends TextStandardizer { 034 035 private Set<String> myDelimiters = new HashSet<>(); 036 037 public FirstNameStandardizer() { 038 super(); 039 040 initializeDelimiters(); 041 } 042 043 protected void initializeDelimiters() { 044 addDelimiters("-", "'"); 045 } 046 047 protected FirstNameStandardizer addDelimiters(String... theDelimiters) { 048 myDelimiters.addAll(Arrays.asList(theDelimiters)); 049 return this; 050 } 051 052 public String standardize(String theString) { 053 theString = replaceTranslates(theString); 054 055 return Arrays.stream(theString.split("\\s+")) 056 .map(this::standardizeNameToken) 057 .filter(s -> !StringUtils.isEmpty(s)) 058 .collect(Collectors.joining(" ")); 059 } 060 061 protected String capitalize(String theString) { 062 if (theString.length() == 0) { 063 return theString; 064 } 065 if (theString.length() == 1) { 066 return theString.toUpperCase(); 067 } 068 069 StringBuilder buf = new StringBuilder(theString.length()); 070 buf.append(Character.toUpperCase(theString.charAt(0))); 071 buf.append(theString.substring(1)); 072 return buf.toString(); 073 } 074 075 protected String standardizeNameToken(String theToken) { 076 if (theToken.isEmpty()) { 077 return theToken; 078 } 079 080 boolean isDelimitedToken = false; 081 for (String d : myDelimiters) { 082 if (theToken.contains(d)) { 083 isDelimitedToken = true; 084 theToken = standardizeDelimitedToken(theToken, d); 085 } 086 } 087 088 if (isDelimitedToken) { 089 return theToken; 090 } 091 092 theToken = removeNoise(theToken); 093 theToken = CaseUtils.toCamelCase(theToken, true); 094 return theToken; 095 } 096 097 protected String standardizeDelimitedToken(String theToken, String d) { 098 boolean isTokenTheDelimiter = theToken.equals(d); 099 if (isTokenTheDelimiter) { 100 return theToken; 101 } 102 103 String splitToken = checkForRegexp(d); 104 String[] splits = theToken.split(splitToken); 105 for (int i = 0; i < splits.length; i++) { 106 splits[i] = standardizeNameToken(splits[i]); 107 } 108 109 String retVal = join(splits, d); 110 if (theToken.startsWith(d)) { 111 retVal = d.concat(retVal); 112 } 113 if (theToken.endsWith(d)) { 114 retVal = retVal.concat(d); 115 } 116 return retVal; 117 } 118 119 protected String join(String[] theSplits, String theDelimiter) { 120 StringBuilder buf = new StringBuilder(); 121 for (int i = 0; i < theSplits.length; i++) { 122 String s = theSplits[i]; 123 if (s == null || s.isEmpty()) { 124 continue; 125 } 126 if (buf.length() != 0) { 127 buf.append(theDelimiter); 128 } 129 buf.append(s); 130 } 131 return buf.toString(); 132 } 133 134 protected String checkForRegexp(String theExpression) { 135 if (theExpression.equals(".") 136 || theExpression.equals("|") 137 || theExpression.equals("(") 138 || theExpression.equals(")")) { 139 return "\\".concat(theExpression); 140 } 141 return theExpression; 142 } 143 144 protected boolean isDelimiter(String theString) { 145 return myDelimiters.contains(theString); 146 } 147}