001/*
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2025 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.context.FhirContext;
023import ca.uhn.fhir.context.RuntimeResourceDefinition;
024import ca.uhn.fhir.i18n.Msg;
025import ca.uhn.fhir.model.primitive.IdDt;
026import ca.uhn.fhir.parser.DataFormatException;
027import ca.uhn.fhir.rest.api.Constants;
028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
029import com.google.common.escape.Escaper;
030import com.google.common.net.PercentEscaper;
031import jakarta.annotation.Nonnull;
032import jakarta.annotation.Nullable;
033import org.apache.commons.lang3.StringUtils;
034import org.hl7.fhir.instance.model.api.IPrimitiveType;
035
036import java.net.MalformedURLException;
037import java.net.URI;
038import java.net.URISyntaxException;
039import java.net.URL;
040import java.net.URLDecoder;
041import java.nio.charset.StandardCharsets;
042import java.nio.file.Path;
043import java.nio.file.Paths;
044import java.util.ArrayList;
045import java.util.Collection;
046import java.util.HashMap;
047import java.util.List;
048import java.util.Map;
049import java.util.Map.Entry;
050import java.util.StringTokenizer;
051import java.util.stream.Collectors;
052
053import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
054import static org.apache.commons.lang3.StringUtils.defaultString;
055import static org.apache.commons.lang3.StringUtils.endsWith;
056import static org.apache.commons.lang3.StringUtils.isBlank;
057import static org.apache.commons.lang3.StringUtils.isNotBlank;
058
059@SuppressWarnings("JavadocLinkAsPlainText")
060public class UrlUtil {
061        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
062
063        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
064        private static final Escaper PARAMETER_ESCAPER_NO_SLASH =
065                        new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS + "/", false);
066        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
067
068        /**
069         * Non instantiable
070         */
071        private UrlUtil() {}
072
073        /**
074         * Cleans up a value that will be serialized as an HTTP header. This method:
075         * <p>
076         * - Strips any newline (\r or \n) characters
077         *
078         * @since 6.2.0
079         */
080        public static String sanitizeHeaderValue(String theHeader) {
081                return theHeader.replace("\n", "").replace("\r", "");
082        }
083
084        public static String sanitizeBaseUrl(String theBaseUrl) {
085                return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", "");
086        }
087
088        /**
089         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
090         */
091        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
092                if (theEndpoint == null) {
093                        return null;
094                }
095                if (isAbsolute(theEndpoint)) {
096                        return theEndpoint;
097                }
098                if (theBase == null) {
099                        return theEndpoint;
100                }
101
102                try {
103                        return new URL(new URL(theBase), theEndpoint).toString();
104                } catch (MalformedURLException e) {
105                        ourLog.warn(
106                                        "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
107                        return theEndpoint;
108                }
109        }
110
111        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
112                if (theParentExtensionUrl == null) {
113                        return theExtensionUrl;
114                }
115                if (theExtensionUrl == null) {
116                        return null;
117                }
118
119                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
120                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
121
122                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
123                        return theExtensionUrl;
124                }
125
126                if (parentLastSlashIdx != childLastSlashIdx) {
127                        return theExtensionUrl;
128                }
129
130                if (!theParentExtensionUrl
131                                .substring(0, parentLastSlashIdx)
132                                .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
133                        return theExtensionUrl;
134                }
135
136                if (theExtensionUrl.length() > parentLastSlashIdx) {
137                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
138                }
139
140                return theExtensionUrl;
141        }
142
143        /**
144         * Given a FHIR resource URL, extracts the associated resource type. Supported formats
145         * include the following inputs, all of which will return {@literal Patient}. If no
146         * resource type can be determined, {@literal null} will be returned.
147         * <ul>
148         * <li>Patient
149         * <li>Patient?
150         * <li>Patient?identifier=foo
151         * <li>/Patient
152         * <li>/Patient?
153         * <li>/Patient?identifier=foo
154         * <li>http://foo/base/Patient?identifier=foo
155         * <li>http://foo/base/Patient/1
156         * <li>http://foo/base/Patient/1/_history/2
157         * <li>Patient/1
158         * <li>Patient/1/_history/2
159         * <li>/Patient/1
160         * <li>/Patient/1/_history/2
161         * </ul>
162         */
163        @Nullable
164        public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) {
165                if (theUrl == null) {
166                        return null;
167                }
168                if (theUrl.startsWith("urn:")) {
169                        return null;
170                }
171
172                String resourceType = null;
173                int qmIndex = theUrl.indexOf("?");
174                if (qmIndex > 0) {
175                        String urlResourceType = theUrl.substring(0, qmIndex);
176                        int slashIdx = urlResourceType.lastIndexOf('/');
177                        if (slashIdx != -1) {
178                                urlResourceType = urlResourceType.substring(slashIdx + 1);
179                        }
180                        if (isNotBlank(urlResourceType)) {
181                                resourceType = urlResourceType;
182                        }
183                } else {
184                        resourceType = theUrl;
185                        int slashIdx = resourceType.indexOf('/');
186                        if (slashIdx == 0) {
187                                resourceType = resourceType.substring(1);
188                        }
189
190                        slashIdx = resourceType.indexOf('/');
191                        if (slashIdx != -1) {
192                                resourceType = new IdDt(resourceType).getResourceType();
193                        }
194                }
195
196                try {
197                        if (isNotBlank(resourceType)) {
198                                theFhirContext.getResourceDefinition(resourceType);
199                        }
200                } catch (DataFormatException e) {
201                        return null;
202                }
203
204                return resourceType;
205        }
206
207        /**
208         * URL encode a value according to RFC 3986, except for the following
209         * characters: <code>-_.*</code>.
210         * <p>
211         * This method is intended to be applied to an individual parameter
212         * name or value. For example, if you are creating the URL
213         * <code>http://example.com/fhir/Patient?key=føø</code>
214         * it would be appropriate to pass the string "føø" to this method,
215         * but not appropriate to pass the entire URL since characters
216         * such as "/" and "?" would also be escaped.
217         * </p>
218         *
219         * @see #escapeUrlParam(String, boolean)
220         */
221        public static String escapeUrlParam(String theUnescaped) {
222                return escapeUrlParam(theUnescaped, true);
223        }
224
225        /**
226         * URL encode a value according to RFC 3986, except for the following
227         * characters: <code>-_.*</code>, and optionally <code>/</code>.
228         * <p>
229         * This method is intended to be applied to an individual parameter
230         * name or value. For example, if you are creating the URL
231         * <code>http://example.com/fhir/Patient?key=føø</code>
232         * it would be appropriate to pass the string "føø" to this method,
233         * but not appropriate to pass the entire URL since characters
234         * such as "?" and possibly "/" would also be escaped.
235         * </p>
236         *
237         * @param theEscapeSlash If <code>true</code>, the slash character will be percent-escaped.
238         *                       Set this to false if you are escaping a query parameter value, since slashes
239         *                       will be more readable in the URL than the percent-encoded version. If you
240         *                       aren't sure where the escaped version will appear, always set this to
241         *                       <code>false</code>, or just call {@link #escapeUrlParam(String)} instead.
242         * @since 8.6.0
243         */
244        public static String escapeUrlParam(String theUnescaped, boolean theEscapeSlash) {
245                if (theUnescaped == null) {
246                        return null;
247                }
248                if (theEscapeSlash) {
249                        return PARAMETER_ESCAPER.escape(theUnescaped);
250                } else {
251                        return PARAMETER_ESCAPER_NO_SLASH.escape(theUnescaped);
252                }
253        }
254
255        /**
256         * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
257         * values in a collection
258         */
259        public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) {
260                return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList());
261        }
262
263        public static boolean isAbsolute(String theValue) {
264                String value = theValue.toLowerCase();
265                return value.startsWith("http://") || value.startsWith("https://");
266        }
267
268        public static boolean isNeedsSanitization(CharSequence theString) {
269                if (theString != null) {
270                        for (int i = 0; i < theString.length(); i++) {
271                                char nextChar = theString.charAt(i);
272                                switch (nextChar) {
273                                        case '\'':
274                                        case '"':
275                                        case '<':
276                                        case '>':
277                                        case '\n':
278                                        case '\r':
279                                                return true;
280                                }
281                                if (nextChar < ' ') {
282                                        return true;
283                                }
284                        }
285                }
286                return false;
287        }
288
289        public static boolean isValid(String theUrl) {
290                if (theUrl == null || theUrl.length() < 8) {
291                        return false;
292                }
293
294                String url = theUrl.toLowerCase();
295                if (url.charAt(0) != 'h') {
296                        return false;
297                }
298                if (url.charAt(1) != 't') {
299                        return false;
300                }
301                if (url.charAt(2) != 't') {
302                        return false;
303                }
304                if (url.charAt(3) != 'p') {
305                        return false;
306                }
307                int slashOffset;
308                if (url.charAt(4) == ':') {
309                        slashOffset = 5;
310                } else if (url.charAt(4) == 's') {
311                        if (url.charAt(5) != ':') {
312                                return false;
313                        }
314                        slashOffset = 6;
315                } else {
316                        return false;
317                }
318
319                if (url.charAt(slashOffset) != '/') {
320                        return false;
321                }
322                if (url.charAt(slashOffset + 1) != '/') {
323                        return false;
324                }
325
326                return true;
327        }
328
329        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl)
330                        throws DataFormatException {
331                String url = theUrl;
332                int paramIndex = url.indexOf('?');
333
334                // Change pattern of "Observation/?param=foo" into "Observation?param=foo"
335                if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
336                        url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
337                        paramIndex--;
338                }
339
340                String resourceName = url.substring(0, paramIndex);
341                if (resourceName.contains("/")) {
342                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
343                }
344                return theCtx.getResourceDefinition(resourceName);
345        }
346
347        @Nonnull
348        public static Map<String, String[]> parseQueryString(String theQueryString) {
349                HashMap<String, List<String>> map = new HashMap<>();
350                parseQueryString(theQueryString, map);
351                return toQueryStringMap(map);
352        }
353
354        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
355                String query = defaultString(theQueryString);
356
357                int questionMarkIdx = query.indexOf('?');
358                if (questionMarkIdx != -1) {
359                        query = query.substring(questionMarkIdx + 1);
360                }
361
362                StringTokenizer tok = new StringTokenizer(query, "&");
363                while (tok.hasMoreTokens()) {
364                        String nextToken = tok.nextToken();
365                        if (isBlank(nextToken)) {
366                                continue;
367                        }
368
369                        int equalsIndex = nextToken.indexOf('=');
370                        String nextValue;
371                        String nextKey;
372                        if (equalsIndex == -1) {
373                                nextKey = nextToken;
374                                nextValue = "";
375                        } else {
376                                nextKey = nextToken.substring(0, equalsIndex);
377                                nextValue = nextToken.substring(equalsIndex + 1);
378                        }
379
380                        nextKey = unescape(nextKey);
381                        nextValue = unescape(nextValue);
382
383                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
384                        list.add(nextValue);
385                }
386        }
387
388        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
389                HashMap<String, List<String>> map = new HashMap<>();
390                for (String next : theQueryString) {
391                        parseQueryString(next, map);
392                }
393                return toQueryStringMap(map);
394        }
395
396        /**
397         * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
398         * and any version identifiers or fragment hash is removed
399         */
400        public static String normalizeCanonicalUrlForComparison(String theUrl) {
401                String retVal;
402                try {
403                        retVal = new URI(theUrl).normalize().toString();
404                } catch (URISyntaxException e) {
405                        retVal = theUrl;
406                }
407                while (endsWith(retVal, "/")) {
408                        retVal = retVal.substring(0, retVal.length() - 1);
409                }
410                int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
411                if (hashOrPipeIndex != -1) {
412                        retVal = retVal.substring(0, hashOrPipeIndex);
413                }
414                return retVal;
415        }
416
417        /**
418         * Parse a URL in one of the following forms:
419         * <ul>
420         * <li>[Resource Type]?[Search Params]
421         * <li>[Resource Type]/[Resource ID]
422         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
423         * </ul>
424         */
425        public static UrlParts parseUrl(String theUrl) {
426                String url = theUrl;
427                UrlParts retVal = new UrlParts();
428                if (url.startsWith("http")) {
429                        int qmIdx = url.indexOf('?');
430                        if (qmIdx != -1) {
431                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
432                                url = url.substring(0, qmIdx);
433                        }
434
435                        IdDt id = new IdDt(url);
436                        retVal.setResourceType(id.getResourceType());
437                        retVal.setResourceId(id.getIdPart());
438                        retVal.setVersionId(id.getVersionIdPart());
439                        return retVal;
440                }
441
442                int parsingStart = 0;
443                if (url.length() > 2) {
444                        if (url.charAt(0) == '/') {
445                                if (Character.isLetter(url.charAt(1))) {
446                                        parsingStart = 1;
447                                }
448                        }
449                }
450
451                int nextStart = parsingStart;
452                boolean nextIsHistory = false;
453
454                for (int idx = parsingStart; idx < url.length(); idx++) {
455                        char nextChar = url.charAt(idx);
456                        boolean atEnd = (idx + 1) == url.length();
457                        if (nextChar == '?' || nextChar == '/' || atEnd) {
458                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
459                                String nextSubstring = url.substring(nextStart, endIdx);
460                                if (retVal.getResourceType() == null) {
461                                        retVal.setResourceType(nextSubstring);
462                                } else if (retVal.getResourceId() == null) {
463                                        retVal.setResourceId(nextSubstring);
464                                } else if (nextIsHistory) {
465                                        retVal.setVersionId(nextSubstring);
466                                } else {
467                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
468                                                nextIsHistory = true;
469                                        } else {
470                                                throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url);
471                                        }
472                                }
473                                if (nextChar == '?') {
474                                        if (url.length() > idx + 1) {
475                                                retVal.setParams(url.substring(idx + 1));
476                                        }
477                                        break;
478                                }
479                                nextStart = idx + 1;
480                        }
481                }
482
483                return retVal;
484        }
485
486        /**
487         * This method specifically HTML-encodes the &quot; and
488         * &lt; characters in order to prevent injection attacks
489         */
490        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
491                String retVal = null;
492                if (theString != null) {
493                        retVal = sanitizeUrlPart(theString.getValueAsString());
494                }
495                return retVal;
496        }
497
498        /**
499         * This method specifically HTML-encodes the &quot; and
500         * &lt; characters in order to prevent injection attacks.
501         * <p>
502         * The following characters are escaped:
503         * <ul>
504         *    <li>&apos;</li>
505         *    <li>&quot;</li>
506         *    <li>&lt;</li>
507         *    <li>&gt;</li>
508         *    <li>\n (newline)</li>
509         * </ul>
510         */
511        public static String sanitizeUrlPart(CharSequence theString) {
512                if (theString == null) {
513                        return null;
514                }
515
516                boolean needsSanitization = isNeedsSanitization(theString);
517
518                if (needsSanitization) {
519                        // Ok, we're sanitizing
520                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
521                        for (int j = 0; j < theString.length(); j++) {
522
523                                char nextChar = theString.charAt(j);
524                                switch (nextChar) {
525                                                /*
526                                                 * NB: If you add a constant here, you also need to add it
527                                                 * to isNeedsSanitization()!!
528                                                 */
529                                        case '\'':
530                                                buffer.append("&apos;");
531                                                break;
532                                        case '"':
533                                                buffer.append("&quot;");
534                                                break;
535                                        case '<':
536                                                buffer.append("&lt;");
537                                                break;
538                                        case '>':
539                                                buffer.append("&gt;");
540                                                break;
541                                        case '\n':
542                                                buffer.append("&#10;");
543                                                break;
544                                        case '\r':
545                                                buffer.append("&#13;");
546                                                break;
547                                        default:
548                                                if (nextChar >= ' ') {
549                                                        buffer.append(nextChar);
550                                                }
551                                                break;
552                                }
553                        } // for build escaped string
554
555                        return buffer.toString();
556                }
557
558                return theString.toString();
559        }
560
561        /**
562         * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
563         * same strings as the input but with sanitization applied
564         */
565        public static String[] sanitizeUrlPart(String[] theParameterValues) {
566                String[] retVal = null;
567                if (theParameterValues != null) {
568                        retVal = new String[theParameterValues.length];
569                        for (int i = 0; i < theParameterValues.length; i++) {
570                                retVal[i] = sanitizeUrlPart(theParameterValues[i]);
571                        }
572                }
573                return retVal;
574        }
575
576        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
577                HashMap<String, String[]> retVal = new HashMap<>();
578                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
579                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
580                }
581                return retVal;
582        }
583
584        public static String unescape(String theString) {
585                if (theString == null) {
586                        return null;
587                }
588                // If the user passes "_outputFormat" as a GET request parameter directly in the URL:
589                final boolean shouldEscapePlus = !theString.startsWith("application/");
590
591                for (int i = 0; i < theString.length(); i++) {
592                        char nextChar = theString.charAt(i);
593                        if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) {
594                                return URLDecoder.decode(theString, StandardCharsets.UTF_8);
595                        }
596                }
597                return theString;
598        }
599
600        /**
601         * Creates list of sub URIs candidates for search with :above modifier
602         * Example input: http://[host]/[pathPart1]/[pathPart2]
603         * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2]
604         *
605         * @param theUri String URI parameter
606         * @return List of URI candidates
607         */
608        public static List<String> getAboveUriCandidates(String theUri) {
609                try {
610                        URI uri = new URI(theUri);
611                        if (uri.getScheme() == null || uri.getHost() == null) {
612                                throwInvalidRequestExceptionForNotValidUri(theUri, null);
613                        }
614                } catch (URISyntaxException theCause) {
615                        throwInvalidRequestExceptionForNotValidUri(theUri, theCause);
616                }
617
618                List<String> candidates = new ArrayList<>();
619                Path path = Paths.get(theUri);
620                candidates.add(path.toString().replace(":/", "://"));
621                while (path.getParent() != null && path.getParent().toString().contains("/")) {
622                        candidates.add(path.getParent().toString().replace(":/", "://"));
623                        path = path.getParent();
624                }
625                return candidates;
626        }
627
628        private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) {
629                throw new InvalidRequestException(
630                                Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause);
631        }
632
633        public static class UrlParts {
634                private String myParams;
635                private String myResourceId;
636                private String myResourceType;
637                private String myVersionId;
638
639                public String getParams() {
640                        return myParams;
641                }
642
643                public void setParams(String theParams) {
644                        myParams = theParams;
645                }
646
647                public String getResourceId() {
648                        return myResourceId;
649                }
650
651                public void setResourceId(String theResourceId) {
652                        myResourceId = theResourceId;
653                }
654
655                public String getResourceType() {
656                        return myResourceType;
657                }
658
659                public void setResourceType(String theResourceType) {
660                        myResourceType = theResourceType;
661                }
662
663                public String getVersionId() {
664                        return myVersionId;
665                }
666
667                public void setVersionId(String theVersionId) {
668                        myVersionId = theVersionId;
669                }
670        }
671}