001/*
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.i18n.Msg;
023import org.apache.commons.lang3.ArrayUtils;
024
025import java.util.ArrayList;
026import java.util.List;
027import java.util.NoSuchElementException;
028
029import static org.apache.commons.lang3.StringUtils.isBlank;
030
031public class UrlPathTokenizer {
032
033        private String[] tokens;
034        private int curPos;
035
036        public UrlPathTokenizer(String theRequestPath) {
037                if (theRequestPath == null) {
038                        theRequestPath = "";
039                }
040                tokens = removeBlanksAndSanitize(theRequestPath.split("/"));
041                curPos = 0;
042        }
043
044        public boolean hasMoreTokens() {
045                return curPos < tokens.length;
046        }
047
048        public int countTokens() {
049                return tokens.length;
050        }
051
052        /**
053         * Returns the next token without updating the current position.
054         * Will throw NoSuchElementException if there are no more tokens.
055         */
056        public String peek() {
057                if (!hasMoreTokens()) {
058                        throw new NoSuchElementException(Msg.code(2420) + "Attempt to retrieve URL token out of bounds");
059                }
060                return tokens[curPos];
061        }
062
063        /**
064         * Returns the next portion. Any URL-encoding is undone, but we will
065         * HTML encode the &lt; and &quot; marks since they are both
066         * not useful un URL paths in FHIR and potentially represent injection
067         * attacks.
068         *
069         * @see UrlUtil#sanitizeUrlPart(String)
070         * @see UrlUtil#unescape(String)
071         */
072        public String nextTokenUnescapedAndSanitized() {
073                String token = peek();
074                curPos++;
075                return token;
076        }
077
078        /**
079         * Given an array of Strings, this method will return all the non-blank entries in that
080         * array, after running sanitizeUrlPart() and unescape() on them.
081         */
082        private static String[] removeBlanksAndSanitize(String[] theInput) {
083                List<String> output = new ArrayList<>();
084                for (String s : theInput) {
085                        if (!isBlank(s)) {
086                                output.add(UrlUtil.sanitizeUrlPart(UrlUtil.unescape(s)));
087                        }
088                }
089                return output.toArray(ArrayUtils.EMPTY_STRING_ARRAY);
090        }
091}