001/*-
002 * #%L
003 * HAPI FHIR - Server Framework
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.rest.server.interceptor.s13n.standardizers;
021
022import ca.uhn.fhir.i18n.Msg;
023import ca.uhn.fhir.rest.server.interceptor.ConfigLoader;
024
025import java.util.HashSet;
026import java.util.Scanner;
027import java.util.Set;
028
029public class NoiseCharacters {
030
031        private static final int RANGE_THRESHOLD = 150;
032
033        private Set<Integer> myNoiseCharacters = new HashSet<>();
034        private Set<Range> myNoiseCharacterRanges = new HashSet<>();
035
036        private int size;
037
038        public int getSize() {
039                return myNoiseCharacters.size();
040        }
041
042        public void initializeFromClasspath() {
043                String noiseChars = ConfigLoader.loadResourceContent("classpath:noise-chars.txt");
044                try (Scanner scanner = new Scanner(noiseChars)) {
045                        while (scanner.hasNext()) {
046                                parse(scanner.nextLine());
047                        }
048                }
049        }
050
051        public boolean isNoise(int theChar) {
052                if (myNoiseCharacters.contains(theChar)) {
053                        return true;
054                }
055
056                for (Range r : myNoiseCharacterRanges) {
057                        if (r.isInRange(theChar)) {
058                                return true;
059                        }
060                }
061
062                return false;
063        }
064
065        private void parse(String theString) {
066                if (theString.contains("-")) {
067                        addRange(theString);
068                } else {
069                        add(theString);
070                }
071        }
072
073        public NoiseCharacters add(String theLiteral) {
074                myNoiseCharacters.add(toInt(theLiteral));
075                return this;
076        }
077
078        public NoiseCharacters addRange(String theRange) {
079                if (!theRange.contains("-")) {
080                        throw new IllegalArgumentException(Msg.code(350) + String.format("Invalid range %s", theRange));
081                }
082
083                String[] range = theRange.split("-");
084                if (range.length < 2) {
085                        throw new IllegalArgumentException(Msg.code(351) + String.format("Invalid range %s", theRange));
086                }
087
088                addRange(range[0].trim(), range[1].trim());
089                return this;
090        }
091
092        public NoiseCharacters addRange(String theLowerBound, String theUpperBound) {
093                int lower = toInt(theLowerBound);
094                int upper = toInt(theUpperBound);
095
096                if (lower > upper) {
097                        throw new IllegalArgumentException(
098                                        Msg.code(352) + String.format("Invalid character range %s-%s", theLowerBound, theUpperBound));
099                }
100
101                if (upper - lower >= RANGE_THRESHOLD) {
102                        myNoiseCharacterRanges.add(new Range(lower, upper));
103                        return this;
104                }
105
106                for (int i = lower; i <= upper; i++) {
107                        myNoiseCharacters.add(i);
108                }
109                return this;
110        }
111
112        private int toInt(String theLiteral) {
113                if (!theLiteral.startsWith("#x")) {
114                        throw new IllegalArgumentException(Msg.code(353) + "Unable to parse " + theLiteral);
115                }
116
117                return Integer.parseInt(theLiteral.substring(2), 16);
118        }
119}