001/*- 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import jakarta.annotation.Nonnull; 023 024import java.io.CharArrayWriter; 025import java.nio.charset.StandardCharsets; 026import java.text.Normalizer; 027import java.util.Arrays; 028 029public class StringUtil { 030 031 /** 032 * If a string ends with a given character, remove that character from the end of the string (as many times as it occurs at the end) 033 */ 034 public static String chompCharacter(String theInput, char theCharacter) { 035 String retVal = theInput; 036 while (retVal != null && retVal.length() > 0 && retVal.charAt(retVal.length() - 1) == theCharacter) { 037 retVal = retVal.substring(0, retVal.length() - 1); 038 } 039 return retVal; 040 } 041 042 public static String normalizeStringForSearchIndexing(String theString) { 043 if (theString == null) { 044 return null; 045 } 046 047 CharArrayWriter outBuffer = new CharArrayWriter(theString.length()); 048 049 /* 050 * The following block of code is used to strip out diacritical marks from latin script 051 * and also convert to upper case. E.g. "j?mes" becomes "JAMES". 052 * 053 * See http://www.unicode.org/charts/PDF/U0300.pdf for the logic 054 * behind stripping 0300-036F 055 * 056 * See #454 for an issue where we were completely stripping non latin characters 057 * See #832 for an issue where we normalize korean characters, which are decomposed 058 */ 059 String string = Normalizer.normalize(theString, Normalizer.Form.NFD); 060 for (int i = 0, n = string.length(); i < n; ++i) { 061 char c = string.charAt(i); 062 if (c >= '\u0300' && c <= '\u036F') { 063 continue; 064 } else { 065 outBuffer.append(c); 066 } 067 } 068 069 return new String(outBuffer.toCharArray()).toUpperCase(); 070 } 071 072 public static String toUtf8String(byte[] theBytes) { 073 byte[] bytes = theBytes; 074 if (theBytes.length >= 3) { 075 if (theBytes[0] == -17 && theBytes[1] == -69 && theBytes[2] == -65) { 076 bytes = Arrays.copyOfRange(theBytes, 3, theBytes.length); 077 } 078 } 079 return new String(bytes, StandardCharsets.UTF_8); 080 } 081 082 /** 083 * Gets the string prefix of the specified length. 084 * 085 * @param theString 086 * String to get the prefix from 087 * @param theCodePointCount 088 * Length of the prefix in code points 089 * @return 090 * Returns the string prefix of the specified number of codepoints. 091 */ 092 public static String left(String theString, int theCodePointCount) { 093 if (theString == null) { 094 return null; 095 } 096 097 if (theCodePointCount < 0) { 098 return ""; 099 } 100 101 // char count can only be bigger than the code point count 102 if (theString.length() <= theCodePointCount) { 103 return theString; 104 } 105 106 return theString.substring(0, theString.offsetByCodePoints(0, theCodePointCount)); 107 } 108 109 @Nonnull 110 public static String prependLineNumbers(@Nonnull String theInput) { 111 StringBuilder schemaOutput = new StringBuilder(); 112 int index = 0; 113 for (String next : theInput.split("\\n")) { 114 schemaOutput 115 .append(index++) 116 .append(": ") 117 .append(next.replace("\r", "")) 118 .append("\n"); 119 } 120 return schemaOutput.toString(); 121 } 122}