001package ca.uhn.fhir.util;
002
003import ca.uhn.fhir.context.FhirContext;
004import ca.uhn.fhir.context.RuntimeResourceDefinition;
005import ca.uhn.fhir.i18n.Msg;
006import ca.uhn.fhir.model.primitive.IdDt;
007import ca.uhn.fhir.parser.DataFormatException;
008import ca.uhn.fhir.rest.api.Constants;
009import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
010import com.google.common.escape.Escaper;
011import com.google.common.net.PercentEscaper;
012import org.apache.commons.lang3.StringUtils;
013import org.apache.http.NameValuePair;
014import org.apache.http.client.utils.URLEncodedUtils;
015import org.apache.http.message.BasicNameValuePair;
016import org.hl7.fhir.instance.model.api.IPrimitiveType;
017
018import javax.annotation.Nonnull;
019import java.io.UnsupportedEncodingException;
020import java.net.MalformedURLException;
021import java.net.URI;
022import java.net.URISyntaxException;
023import java.net.URL;
024import java.net.URLDecoder;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Map.Entry;
031import java.util.StringTokenizer;
032import java.util.stream.Collectors;
033
034import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
035import static org.apache.commons.lang3.StringUtils.defaultString;
036import static org.apache.commons.lang3.StringUtils.endsWith;
037import static org.apache.commons.lang3.StringUtils.isBlank;
038
039/*
040 * #%L
041 * HAPI FHIR - Core Library
042 * %%
043 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
044 * %%
045 * Licensed under the Apache License, Version 2.0 (the "License");
046 * you may not use this file except in compliance with the License.
047 * You may obtain a copy of the License at
048 *
049 *      http://www.apache.org/licenses/LICENSE-2.0
050 *
051 * Unless required by applicable law or agreed to in writing, software
052 * distributed under the License is distributed on an "AS IS" BASIS,
053 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
054 * See the License for the specific language governing permissions and
055 * limitations under the License.
056 * #L%
057 */
058
059public class UrlUtil {
060        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
061
062        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
063        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
064
065        public static String sanitizeBaseUrl(String theBaseUrl) {
066                return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", "");
067        }
068
069        public static class UrlParts {
070                private String myParams;
071                private String myResourceId;
072                private String myResourceType;
073                private String myVersionId;
074
075                public String getParams() {
076                        return myParams;
077                }
078
079                public void setParams(String theParams) {
080                        myParams = theParams;
081                }
082
083                public String getResourceId() {
084                        return myResourceId;
085                }
086
087                public void setResourceId(String theResourceId) {
088                        myResourceId = theResourceId;
089                }
090
091                public String getResourceType() {
092                        return myResourceType;
093                }
094
095                public void setResourceType(String theResourceType) {
096                        myResourceType = theResourceType;
097                }
098
099                public String getVersionId() {
100                        return myVersionId;
101                }
102
103                public void setVersionId(String theVersionId) {
104                        myVersionId = theVersionId;
105                }
106        }
107
108        /**
109         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
110         */
111        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
112                if (theEndpoint == null) {
113                        return null;
114                }
115                if (isAbsolute(theEndpoint)) {
116                        return theEndpoint;
117                }
118                if (theBase == null) {
119                        return theEndpoint;
120                }
121
122                try {
123                        return new URL(new URL(theBase), theEndpoint).toString();
124                } catch (MalformedURLException e) {
125                        ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
126                        return theEndpoint;
127                }
128        }
129
130        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
131                if (theParentExtensionUrl == null) {
132                        return theExtensionUrl;
133                }
134                if (theExtensionUrl == null) {
135                        return null;
136                }
137
138                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
139                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
140
141                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
142                        return theExtensionUrl;
143                }
144
145                if (parentLastSlashIdx != childLastSlashIdx) {
146                        return theExtensionUrl;
147                }
148
149                if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
150                        return theExtensionUrl;
151                }
152
153                if (theExtensionUrl.length() > parentLastSlashIdx) {
154                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
155                }
156
157                return theExtensionUrl;
158        }
159
160        /**
161         * URL encode a value according to RFC 3986
162         * <p>
163         * This method is intended to be applied to an individual parameter
164         * name or value. For example, if you are creating the URL
165         * <code>http://example.com/fhir/Patient?key=f</code>
166         * it would be appropriate to pass the string "f" to this method,
167         * but not appropriate to pass the entire URL since characters
168         * such as "/" and "?" would also be escaped.
169         * </P>
170         */
171        public static String escapeUrlParam(String theUnescaped) {
172                if (theUnescaped == null) {
173                        return null;
174                }
175                return PARAMETER_ESCAPER.escape(theUnescaped);
176        }
177
178        /**
179         * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
180         * values in a collection
181         */
182        public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) {
183                return theUnescaped
184                        .stream()
185                        .map(t -> PARAMETER_ESCAPER.escape(t))
186                        .collect(Collectors.toList());
187        }
188
189
190        public static boolean isAbsolute(String theValue) {
191                String value = theValue.toLowerCase();
192                return value.startsWith("http://") || value.startsWith("https://");
193        }
194
195        public static boolean isNeedsSanitization(CharSequence theString) {
196                if (theString != null) {
197                        for (int i = 0; i < theString.length(); i++) {
198                                char nextChar = theString.charAt(i);
199                                switch (nextChar) {
200                                        case '\'':
201                                        case '"':
202                                        case '<':
203                                        case '>':
204                                        case '\n':
205                                        case '\r':
206                                                return true;
207                                }
208                                if (nextChar < ' ') {
209                                        return true;
210                                }
211                        }
212                }
213                return false;
214        }
215
216        public static boolean isValid(String theUrl) {
217                if (theUrl == null || theUrl.length() < 8) {
218                        return false;
219                }
220
221                String url = theUrl.toLowerCase();
222                if (url.charAt(0) != 'h') {
223                        return false;
224                }
225                if (url.charAt(1) != 't') {
226                        return false;
227                }
228                if (url.charAt(2) != 't') {
229                        return false;
230                }
231                if (url.charAt(3) != 'p') {
232                        return false;
233                }
234                int slashOffset;
235                if (url.charAt(4) == ':') {
236                        slashOffset = 5;
237                } else if (url.charAt(4) == 's') {
238                        if (url.charAt(5) != ':') {
239                                return false;
240                        }
241                        slashOffset = 6;
242                } else {
243                        return false;
244                }
245
246                if (url.charAt(slashOffset) != '/') {
247                        return false;
248                }
249                if (url.charAt(slashOffset + 1) != '/') {
250                        return false;
251                }
252
253                return true;
254        }
255
256        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException {
257                String url = theUrl;
258                int paramIndex = url.indexOf('?');
259
260                // Change pattern of "Observation/?param=foo" into "Observation?param=foo"
261                if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
262                        url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
263                        paramIndex--;
264                }
265
266                String resourceName = url.substring(0, paramIndex);
267                if (resourceName.contains("/")) {
268                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
269                }
270                return theCtx.getResourceDefinition(resourceName);
271        }
272
273        public static Map<String, String[]> parseQueryString(String theQueryString) {
274                HashMap<String, List<String>> map = new HashMap<>();
275                parseQueryString(theQueryString, map);
276                return toQueryStringMap(map);
277        }
278
279        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
280                String query = defaultString(theQueryString);
281                if (query.startsWith("?")) {
282                        query = query.substring(1);
283                }
284
285
286                StringTokenizer tok = new StringTokenizer(query, "&");
287                while (tok.hasMoreTokens()) {
288                        String nextToken = tok.nextToken();
289                        if (isBlank(nextToken)) {
290                                continue;
291                        }
292
293                        int equalsIndex = nextToken.indexOf('=');
294                        String nextValue;
295                        String nextKey;
296                        if (equalsIndex == -1) {
297                                nextKey = nextToken;
298                                nextValue = "";
299                        } else {
300                                nextKey = nextToken.substring(0, equalsIndex);
301                                nextValue = nextToken.substring(equalsIndex + 1);
302                        }
303
304                        nextKey = unescape(nextKey);
305                        nextValue = unescape(nextValue);
306
307                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
308                        list.add(nextValue);
309                }
310        }
311
312        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
313                HashMap<String, List<String>> map = new HashMap<>();
314                for (String next : theQueryString) {
315                        parseQueryString(next, map);
316                }
317                return toQueryStringMap(map);
318        }
319
320        /**
321         * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
322         * and any version identifiers or fragment hash is removed
323         */
324        public static String normalizeCanonicalUrlForComparison(String theUrl) {
325                String retVal;
326                try {
327                        retVal = new URI(theUrl).normalize().toString();
328                } catch (URISyntaxException e) {
329                        retVal = theUrl;
330                }
331                while (endsWith(retVal, "/")) {
332                        retVal = retVal.substring(0, retVal.length() - 1);
333                }
334                int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
335                if (hashOrPipeIndex != -1) {
336                        retVal = retVal.substring(0, hashOrPipeIndex);
337                }
338                return retVal;
339        }
340
341        /**
342         * Parse a URL in one of the following forms:
343         * <ul>
344         * <li>[Resource Type]?[Search Params]
345         * <li>[Resource Type]/[Resource ID]
346         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
347         * </ul>
348         */
349        public static UrlParts parseUrl(String theUrl) {
350                String url = theUrl;
351                UrlParts retVal = new UrlParts();
352                if (url.startsWith("http")) {
353                        int qmIdx = url.indexOf('?');
354                        if (qmIdx != -1) {
355                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
356                                url = url.substring(0, qmIdx);
357                        }
358
359                        IdDt id = new IdDt(url);
360                        retVal.setResourceType(id.getResourceType());
361                        retVal.setResourceId(id.getIdPart());
362                        retVal.setVersionId(id.getVersionIdPart());
363                        return retVal;
364                }
365
366                int parsingStart = 0;
367                if (url.length() > 2) {
368                        if (url.charAt(0) == '/') {
369                                if (Character.isLetter(url.charAt(1))) {
370                                        parsingStart = 1;
371                                }
372                        }
373                }
374
375                int nextStart = parsingStart;
376                boolean nextIsHistory = false;
377
378                for (int idx = parsingStart; idx < url.length(); idx++) {
379                        char nextChar = url.charAt(idx);
380                        boolean atEnd = (idx + 1) == url.length();
381                        if (nextChar == '?' || nextChar == '/' || atEnd) {
382                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
383                                String nextSubstring = url.substring(nextStart, endIdx);
384                                if (retVal.getResourceType() == null) {
385                                        retVal.setResourceType(nextSubstring);
386                                } else if (retVal.getResourceId() == null) {
387                                        retVal.setResourceId(nextSubstring);
388                                } else if (nextIsHistory) {
389                                        retVal.setVersionId(nextSubstring);
390                                } else {
391                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
392                                                nextIsHistory = true;
393                                        } else {
394                                                throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url);
395                                        }
396                                }
397                                if (nextChar == '?') {
398                                        if (url.length() > idx + 1) {
399                                                retVal.setParams(url.substring(idx + 1));
400                                        }
401                                        break;
402                                }
403                                nextStart = idx + 1;
404                        }
405                }
406
407                return retVal;
408
409        }
410
411        /**
412         * This method specifically HTML-encodes the &quot; and
413         * &lt; characters in order to prevent injection attacks
414         */
415        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
416                String retVal = null;
417                if (theString != null) {
418                        retVal = sanitizeUrlPart(theString.getValueAsString());
419                }
420                return retVal;
421        }
422
423        /**
424         * This method specifically HTML-encodes the &quot; and
425         * &lt; characters in order to prevent injection attacks.
426         * <p>
427         * The following characters are escaped:
428         * <ul>
429         *    <li>&apos;</li>
430         *    <li>&quot;</li>
431         *    <li>&lt;</li>
432         *    <li>&gt;</li>
433         *    <li>\n (newline)</li>
434         * </ul>
435         */
436        public static String sanitizeUrlPart(CharSequence theString) {
437                if (theString == null) {
438                        return null;
439                }
440
441                boolean needsSanitization = isNeedsSanitization(theString);
442
443                if (needsSanitization) {
444                        // Ok, we're sanitizing
445                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
446                        for (int j = 0; j < theString.length(); j++) {
447
448                                char nextChar = theString.charAt(j);
449                                switch (nextChar) {
450                                        /*
451                                         * NB: If you add a constant here, you also need to add it
452                                         * to isNeedsSanitization()!!
453                                         */
454                                        case '\'':
455                                                buffer.append("&apos;");
456                                                break;
457                                        case '"':
458                                                buffer.append("&quot;");
459                                                break;
460                                        case '<':
461                                                buffer.append("&lt;");
462                                                break;
463                                        case '>':
464                                                buffer.append("&gt;");
465                                                break;
466                                        case '\n':
467                                                buffer.append("&#10;");
468                                                break;
469                                        case '\r':
470                                                buffer.append("&#13;");
471                                                break;
472                                        default:
473                                                if (nextChar >= ' ') {
474                                                        buffer.append(nextChar);
475                                                }
476                                                break;
477                                }
478
479                        } // for build escaped string
480
481                        return buffer.toString();
482                }
483
484                return theString.toString();
485        }
486
487        /**
488         * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
489         * same strings as the input but with sanitization applied
490         */
491        public static String[] sanitizeUrlPart(String[] theParameterValues) {
492                String[] retVal = null;
493                if (theParameterValues != null) {
494                        retVal = new String[theParameterValues.length];
495                        for (int i = 0; i < theParameterValues.length; i++) {
496                                retVal[i] = sanitizeUrlPart(theParameterValues[i]);
497                        }
498                }
499                return retVal;
500        }
501
502        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
503                HashMap<String, String[]> retVal = new HashMap<>();
504                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
505                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
506                }
507                return retVal;
508        }
509
510        public static String unescape(String theString) {
511                if (theString == null) {
512                        return null;
513                }
514                for (int i = 0; i < theString.length(); i++) {
515                        char nextChar = theString.charAt(i);
516                        if (nextChar == '%' || nextChar == '+') {
517                                try {
518                                        // Yes it would be nice to not use a string "UTF-8" but the equivalent
519                                        // method that takes Charset is JDK10+ only... sigh....
520                                        return URLDecoder.decode(theString, "UTF-8");
521                                } catch (UnsupportedEncodingException e) {
522                                        throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e);
523                                }
524                        }
525                }
526                return theString;
527        }
528
529        public static List<NameValuePair> translateMatchUrl(String theMatchUrl) {
530                List<NameValuePair> parameters;
531                String matchUrl = theMatchUrl;
532                int questionMarkIndex = matchUrl.indexOf('?');
533                if (questionMarkIndex != -1) {
534                        matchUrl = matchUrl.substring(questionMarkIndex + 1);
535                }
536
537                final String[] searchList = new String[]{
538                        "+",
539                        "|",
540                        "=>=",
541                        "=<=",
542                        "=>",
543                        "=<"
544                };
545                final String[] replacementList = new String[]{
546                        "%2B",
547                        "%7C",
548                        "=%3E%3D",
549                        "=%3C%3D",
550                        "=%3E",
551                        "=%3C"
552                };
553                matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList);
554                if (matchUrl.contains(" ")) {
555                        throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)");
556                }
557
558                parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&');
559
560                // One issue that has happened before is people putting a "+" sign into an email address in a match URL
561                // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just
562                // assume they really meant "+".
563                for (int i = 0; i < parameters.size(); i++) {
564                        NameValuePair next = parameters.get(i);
565                        if (next.getName().equals("email") && next.getValue().contains(" ")) {
566                                BasicNameValuePair newPair = new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+'));
567                                parameters.set(i, newPair);
568                        }
569                }
570
571                return parameters;
572        }
573}