Source code

001/*-
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2026 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import jakarta.annotation.Nonnull;
023
024import java.io.CharArrayWriter;
025import java.nio.charset.StandardCharsets;
026import java.text.Normalizer;
027import java.util.Arrays;
028
029public class StringUtil {
030
031        /**
032         * If a string ends with a given character, remove that character from the end of the string (as many times as it occurs at the end)
033         */
034        public static String chompCharacter(String theInput, char theCharacter) {
035                String retVal = theInput;
036                while (retVal != null && retVal.length() > 0 && retVal.charAt(retVal.length() - 1) == theCharacter) {
037                        retVal = retVal.substring(0, retVal.length() - 1);
038                }
039                return retVal;
040        }
041
042        public static String normalizeStringForSearchIndexing(String theString) {
043                if (theString == null) {
044                        return null;
045                }
046
047                CharArrayWriter outBuffer = new CharArrayWriter(theString.length());
048
049                /*
050                 * The following block of code is used to strip out diacritical marks from latin script
051                 * and also convert to upper case. E.g. "j?mes" becomes "JAMES".
052                 *
053                 * See http://www.unicode.org/charts/PDF/U0300.pdf for the logic
054                 * behind stripping 0300-036F
055                 *
056                 * See #454 for an issue where we were completely stripping non latin characters
057                 * See #832 for an issue where we normalize korean characters, which are decomposed
058                 */
059                String string = Normalizer.normalize(theString, Normalizer.Form.NFD);
060                for (int i = 0, n = string.length(); i < n; ++i) {
061                        char c = string.charAt(i);
062                        if (c >= '\u0300' && c <= '\u036F') {
063                                continue;
064                        } else {
065                                outBuffer.append(c);
066                        }
067                }
068
069                return new String(outBuffer.toCharArray()).toUpperCase();
070        }
071
072        public static String toUtf8String(byte[] theBytes) {
073                byte[] bytes = theBytes;
074                if (theBytes.length >= 3) {
075                        if (theBytes[0] == -17 && theBytes[1] == -69 && theBytes[2] == -65) {
076                                bytes = Arrays.copyOfRange(theBytes, 3, theBytes.length);
077                        }
078                }
079                return new String(bytes, StandardCharsets.UTF_8);
080        }
081
082        /**
083         * Gets the string prefix of the specified length.
084         *
085         * @param theString
086         *      String to get the prefix from
087         * @param theCodePointCount
088         *      Length of the prefix in code points
089         * @return
090         *      Returns the string prefix of the specified number of codepoints.
091         */
092        public static String left(String theString, int theCodePointCount) {
093                if (theString == null) {
094                        return null;
095                }
096
097                if (theCodePointCount < 0) {
098                        return "";
099                }
100
101                // char count can only be bigger than the code point count
102                if (theString.length() <= theCodePointCount) {
103                        return theString;
104                }
105
106                return theString.substring(0, theString.offsetByCodePoints(0, theCodePointCount));
107        }
108
109        @Nonnull
110        public static String prependLineNumbers(@Nonnull String theInput) {
111                StringBuilder schemaOutput = new StringBuilder();
112                int index = 0;
113                for (String next : theInput.split("\\n")) {
114                        schemaOutput
115                                        .append(index++)
116                                        .append(": ")
117                                        .append(next.replace("\r", ""))
118                                        .append("\n");
119                }
120                return schemaOutput.toString();
121        }
122}