001/*
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2025 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.context.FhirContext;
023import ca.uhn.fhir.context.RuntimeResourceDefinition;
024import ca.uhn.fhir.i18n.Msg;
025import ca.uhn.fhir.model.primitive.IdDt;
026import ca.uhn.fhir.parser.DataFormatException;
027import ca.uhn.fhir.rest.api.Constants;
028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
029import com.google.common.escape.Escaper;
030import com.google.common.net.PercentEscaper;
031import jakarta.annotation.Nonnull;
032import jakarta.annotation.Nullable;
033import org.apache.commons.lang3.StringUtils;
034import org.hl7.fhir.instance.model.api.IPrimitiveType;
035
036import java.io.UnsupportedEncodingException;
037import java.net.MalformedURLException;
038import java.net.URI;
039import java.net.URISyntaxException;
040import java.net.URL;
041import java.net.URLDecoder;
042import java.nio.file.Path;
043import java.nio.file.Paths;
044import java.util.ArrayList;
045import java.util.Collection;
046import java.util.HashMap;
047import java.util.List;
048import java.util.Map;
049import java.util.Map.Entry;
050import java.util.StringTokenizer;
051import java.util.stream.Collectors;
052
053import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
054import static org.apache.commons.lang3.StringUtils.defaultString;
055import static org.apache.commons.lang3.StringUtils.endsWith;
056import static org.apache.commons.lang3.StringUtils.isBlank;
057import static org.apache.commons.lang3.StringUtils.isNotBlank;
058
059@SuppressWarnings("JavadocLinkAsPlainText")
060public class UrlUtil {
061        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
062
063        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
064        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
065
066        /**
067         * Non instantiable
068         */
069        private UrlUtil() {}
070
071        /**
072         * Cleans up a value that will be serialized as an HTTP header. This method:
073         * <p>
074         * - Strips any newline (\r or \n) characters
075         *
076         * @since 6.2.0
077         */
078        public static String sanitizeHeaderValue(String theHeader) {
079                return theHeader.replace("\n", "").replace("\r", "");
080        }
081
082        public static String sanitizeBaseUrl(String theBaseUrl) {
083                return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", "");
084        }
085
086        /**
087         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
088         */
089        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
090                if (theEndpoint == null) {
091                        return null;
092                }
093                if (isAbsolute(theEndpoint)) {
094                        return theEndpoint;
095                }
096                if (theBase == null) {
097                        return theEndpoint;
098                }
099
100                try {
101                        return new URL(new URL(theBase), theEndpoint).toString();
102                } catch (MalformedURLException e) {
103                        ourLog.warn(
104                                        "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
105                        return theEndpoint;
106                }
107        }
108
109        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
110                if (theParentExtensionUrl == null) {
111                        return theExtensionUrl;
112                }
113                if (theExtensionUrl == null) {
114                        return null;
115                }
116
117                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
118                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
119
120                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
121                        return theExtensionUrl;
122                }
123
124                if (parentLastSlashIdx != childLastSlashIdx) {
125                        return theExtensionUrl;
126                }
127
128                if (!theParentExtensionUrl
129                                .substring(0, parentLastSlashIdx)
130                                .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
131                        return theExtensionUrl;
132                }
133
134                if (theExtensionUrl.length() > parentLastSlashIdx) {
135                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
136                }
137
138                return theExtensionUrl;
139        }
140
141        /**
142         * Given a FHIR resource URL, extracts the associated resource type. Supported formats
143         * include the following inputs, all of which will return {@literal Patient}. If no
144         * resource type can be determined, {@literal null} will be returned.
145         * <ul>
146         * <li>Patient
147         * <li>Patient?
148         * <li>Patient?identifier=foo
149         * <li>/Patient
150         * <li>/Patient?
151         * <li>/Patient?identifier=foo
152         * <li>http://foo/base/Patient?identifier=foo
153         * <li>http://foo/base/Patient/1
154         * <li>http://foo/base/Patient/1/_history/2
155         * <li>Patient/1
156         * <li>Patient/1/_history/2
157         * <li>/Patient/1
158         * <li>/Patient/1/_history/2
159         * </ul>
160         */
161        @Nullable
162        public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) {
163                if (theUrl == null) {
164                        return null;
165                }
166                if (theUrl.startsWith("urn:")) {
167                        return null;
168                }
169
170                String resourceType = null;
171                int qmIndex = theUrl.indexOf("?");
172                if (qmIndex > 0) {
173                        String urlResourceType = theUrl.substring(0, qmIndex);
174                        int slashIdx = urlResourceType.lastIndexOf('/');
175                        if (slashIdx != -1) {
176                                urlResourceType = urlResourceType.substring(slashIdx + 1);
177                        }
178                        if (isNotBlank(urlResourceType)) {
179                                resourceType = urlResourceType;
180                        }
181                } else {
182                        resourceType = theUrl;
183                        int slashIdx = resourceType.indexOf('/');
184                        if (slashIdx == 0) {
185                                resourceType = resourceType.substring(1);
186                        }
187
188                        slashIdx = resourceType.indexOf('/');
189                        if (slashIdx != -1) {
190                                resourceType = new IdDt(resourceType).getResourceType();
191                        }
192                }
193
194                try {
195                        if (isNotBlank(resourceType)) {
196                                theFhirContext.getResourceDefinition(resourceType);
197                        }
198                } catch (DataFormatException e) {
199                        return null;
200                }
201
202                return resourceType;
203        }
204
205        /**
206         * URL encode a value according to RFC 3986
207         * <p>
208         * This method is intended to be applied to an individual parameter
209         * name or value. For example, if you are creating the URL
210         * <code>http://example.com/fhir/Patient?key=føø</code>
211         * it would be appropriate to pass the string "føø" to this method,
212         * but not appropriate to pass the entire URL since characters
213         * such as "/" and "?" would also be escaped.
214         * </P>
215         */
216        public static String escapeUrlParam(String theUnescaped) {
217                if (theUnescaped == null) {
218                        return null;
219                }
220                return PARAMETER_ESCAPER.escape(theUnescaped);
221        }
222
223        /**
224         * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
225         * values in a collection
226         */
227        public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) {
228                return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList());
229        }
230
231        public static boolean isAbsolute(String theValue) {
232                String value = theValue.toLowerCase();
233                return value.startsWith("http://") || value.startsWith("https://");
234        }
235
236        public static boolean isNeedsSanitization(CharSequence theString) {
237                if (theString != null) {
238                        for (int i = 0; i < theString.length(); i++) {
239                                char nextChar = theString.charAt(i);
240                                switch (nextChar) {
241                                        case '\'':
242                                        case '"':
243                                        case '<':
244                                        case '>':
245                                        case '\n':
246                                        case '\r':
247                                                return true;
248                                }
249                                if (nextChar < ' ') {
250                                        return true;
251                                }
252                        }
253                }
254                return false;
255        }
256
257        public static boolean isValid(String theUrl) {
258                if (theUrl == null || theUrl.length() < 8) {
259                        return false;
260                }
261
262                String url = theUrl.toLowerCase();
263                if (url.charAt(0) != 'h') {
264                        return false;
265                }
266                if (url.charAt(1) != 't') {
267                        return false;
268                }
269                if (url.charAt(2) != 't') {
270                        return false;
271                }
272                if (url.charAt(3) != 'p') {
273                        return false;
274                }
275                int slashOffset;
276                if (url.charAt(4) == ':') {
277                        slashOffset = 5;
278                } else if (url.charAt(4) == 's') {
279                        if (url.charAt(5) != ':') {
280                                return false;
281                        }
282                        slashOffset = 6;
283                } else {
284                        return false;
285                }
286
287                if (url.charAt(slashOffset) != '/') {
288                        return false;
289                }
290                if (url.charAt(slashOffset + 1) != '/') {
291                        return false;
292                }
293
294                return true;
295        }
296
297        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl)
298                        throws DataFormatException {
299                String url = theUrl;
300                int paramIndex = url.indexOf('?');
301
302                // Change pattern of "Observation/?param=foo" into "Observation?param=foo"
303                if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
304                        url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
305                        paramIndex--;
306                }
307
308                String resourceName = url.substring(0, paramIndex);
309                if (resourceName.contains("/")) {
310                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
311                }
312                return theCtx.getResourceDefinition(resourceName);
313        }
314
315        @Nonnull
316        public static Map<String, String[]> parseQueryString(String theQueryString) {
317                HashMap<String, List<String>> map = new HashMap<>();
318                parseQueryString(theQueryString, map);
319                return toQueryStringMap(map);
320        }
321
322        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
323                String query = defaultString(theQueryString);
324                if (query.startsWith("?")) {
325                        query = query.substring(1);
326                }
327
328                StringTokenizer tok = new StringTokenizer(query, "&");
329                while (tok.hasMoreTokens()) {
330                        String nextToken = tok.nextToken();
331                        if (isBlank(nextToken)) {
332                                continue;
333                        }
334
335                        int equalsIndex = nextToken.indexOf('=');
336                        String nextValue;
337                        String nextKey;
338                        if (equalsIndex == -1) {
339                                nextKey = nextToken;
340                                nextValue = "";
341                        } else {
342                                nextKey = nextToken.substring(0, equalsIndex);
343                                nextValue = nextToken.substring(equalsIndex + 1);
344                        }
345
346                        nextKey = unescape(nextKey);
347                        nextValue = unescape(nextValue);
348
349                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
350                        list.add(nextValue);
351                }
352        }
353
354        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
355                HashMap<String, List<String>> map = new HashMap<>();
356                for (String next : theQueryString) {
357                        parseQueryString(next, map);
358                }
359                return toQueryStringMap(map);
360        }
361
362        /**
363         * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
364         * and any version identifiers or fragment hash is removed
365         */
366        public static String normalizeCanonicalUrlForComparison(String theUrl) {
367                String retVal;
368                try {
369                        retVal = new URI(theUrl).normalize().toString();
370                } catch (URISyntaxException e) {
371                        retVal = theUrl;
372                }
373                while (endsWith(retVal, "/")) {
374                        retVal = retVal.substring(0, retVal.length() - 1);
375                }
376                int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
377                if (hashOrPipeIndex != -1) {
378                        retVal = retVal.substring(0, hashOrPipeIndex);
379                }
380                return retVal;
381        }
382
383        /**
384         * Parse a URL in one of the following forms:
385         * <ul>
386         * <li>[Resource Type]?[Search Params]
387         * <li>[Resource Type]/[Resource ID]
388         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
389         * </ul>
390         */
391        public static UrlParts parseUrl(String theUrl) {
392                String url = theUrl;
393                UrlParts retVal = new UrlParts();
394                if (url.startsWith("http")) {
395                        int qmIdx = url.indexOf('?');
396                        if (qmIdx != -1) {
397                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
398                                url = url.substring(0, qmIdx);
399                        }
400
401                        IdDt id = new IdDt(url);
402                        retVal.setResourceType(id.getResourceType());
403                        retVal.setResourceId(id.getIdPart());
404                        retVal.setVersionId(id.getVersionIdPart());
405                        return retVal;
406                }
407
408                int parsingStart = 0;
409                if (url.length() > 2) {
410                        if (url.charAt(0) == '/') {
411                                if (Character.isLetter(url.charAt(1))) {
412                                        parsingStart = 1;
413                                }
414                        }
415                }
416
417                int nextStart = parsingStart;
418                boolean nextIsHistory = false;
419
420                for (int idx = parsingStart; idx < url.length(); idx++) {
421                        char nextChar = url.charAt(idx);
422                        boolean atEnd = (idx + 1) == url.length();
423                        if (nextChar == '?' || nextChar == '/' || atEnd) {
424                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
425                                String nextSubstring = url.substring(nextStart, endIdx);
426                                if (retVal.getResourceType() == null) {
427                                        retVal.setResourceType(nextSubstring);
428                                } else if (retVal.getResourceId() == null) {
429                                        retVal.setResourceId(nextSubstring);
430                                } else if (nextIsHistory) {
431                                        retVal.setVersionId(nextSubstring);
432                                } else {
433                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
434                                                nextIsHistory = true;
435                                        } else {
436                                                throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url);
437                                        }
438                                }
439                                if (nextChar == '?') {
440                                        if (url.length() > idx + 1) {
441                                                retVal.setParams(url.substring(idx + 1));
442                                        }
443                                        break;
444                                }
445                                nextStart = idx + 1;
446                        }
447                }
448
449                return retVal;
450        }
451
452        /**
453         * This method specifically HTML-encodes the &quot; and
454         * &lt; characters in order to prevent injection attacks
455         */
456        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
457                String retVal = null;
458                if (theString != null) {
459                        retVal = sanitizeUrlPart(theString.getValueAsString());
460                }
461                return retVal;
462        }
463
464        /**
465         * This method specifically HTML-encodes the &quot; and
466         * &lt; characters in order to prevent injection attacks.
467         * <p>
468         * The following characters are escaped:
469         * <ul>
470         *    <li>&apos;</li>
471         *    <li>&quot;</li>
472         *    <li>&lt;</li>
473         *    <li>&gt;</li>
474         *    <li>\n (newline)</li>
475         * </ul>
476         */
477        public static String sanitizeUrlPart(CharSequence theString) {
478                if (theString == null) {
479                        return null;
480                }
481
482                boolean needsSanitization = isNeedsSanitization(theString);
483
484                if (needsSanitization) {
485                        // Ok, we're sanitizing
486                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
487                        for (int j = 0; j < theString.length(); j++) {
488
489                                char nextChar = theString.charAt(j);
490                                switch (nextChar) {
491                                                /*
492                                                 * NB: If you add a constant here, you also need to add it
493                                                 * to isNeedsSanitization()!!
494                                                 */
495                                        case '\'':
496                                                buffer.append("&apos;");
497                                                break;
498                                        case '"':
499                                                buffer.append("&quot;");
500                                                break;
501                                        case '<':
502                                                buffer.append("&lt;");
503                                                break;
504                                        case '>':
505                                                buffer.append("&gt;");
506                                                break;
507                                        case '\n':
508                                                buffer.append("&#10;");
509                                                break;
510                                        case '\r':
511                                                buffer.append("&#13;");
512                                                break;
513                                        default:
514                                                if (nextChar >= ' ') {
515                                                        buffer.append(nextChar);
516                                                }
517                                                break;
518                                }
519                        } // for build escaped string
520
521                        return buffer.toString();
522                }
523
524                return theString.toString();
525        }
526
527        /**
528         * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
529         * same strings as the input but with sanitization applied
530         */
531        public static String[] sanitizeUrlPart(String[] theParameterValues) {
532                String[] retVal = null;
533                if (theParameterValues != null) {
534                        retVal = new String[theParameterValues.length];
535                        for (int i = 0; i < theParameterValues.length; i++) {
536                                retVal[i] = sanitizeUrlPart(theParameterValues[i]);
537                        }
538                }
539                return retVal;
540        }
541
542        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
543                HashMap<String, String[]> retVal = new HashMap<>();
544                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
545                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
546                }
547                return retVal;
548        }
549
550        public static String unescape(String theString) {
551                if (theString == null) {
552                        return null;
553                }
554                // If the user passes "_outputFormat" as a GET request parameter directly in the URL:
555                final boolean shouldEscapePlus = !theString.startsWith("application/");
556
557                for (int i = 0; i < theString.length(); i++) {
558                        char nextChar = theString.charAt(i);
559                        if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) {
560                                try {
561                                        // Yes it would be nice to not use a string "UTF-8" but the equivalent
562                                        // method that takes Charset is JDK10+ only... sigh....
563                                        return URLDecoder.decode(theString, "UTF-8");
564                                } catch (UnsupportedEncodingException e) {
565                                        throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e);
566                                }
567                        }
568                }
569                return theString;
570        }
571
572        /**
573         * Creates list of sub URIs candidates for search with :above modifier
574         * Example input: http://[host]/[pathPart1]/[pathPart2]
575         * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2]
576         *
577         * @param theUri String URI parameter
578         * @return List of URI candidates
579         */
580        public static List<String> getAboveUriCandidates(String theUri) {
581                try {
582                        URI uri = new URI(theUri);
583                        if (uri.getScheme() == null || uri.getHost() == null) {
584                                throwInvalidRequestExceptionForNotValidUri(theUri, null);
585                        }
586                } catch (URISyntaxException theCause) {
587                        throwInvalidRequestExceptionForNotValidUri(theUri, theCause);
588                }
589
590                List<String> candidates = new ArrayList<>();
591                Path path = Paths.get(theUri);
592                candidates.add(path.toString().replace(":/", "://"));
593                while (path.getParent() != null && path.getParent().toString().contains("/")) {
594                        candidates.add(path.getParent().toString().replace(":/", "://"));
595                        path = path.getParent();
596                }
597                return candidates;
598        }
599
600        private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) {
601                throw new InvalidRequestException(
602                                Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause);
603        }
604
605        public static class UrlParts {
606                private String myParams;
607                private String myResourceId;
608                private String myResourceType;
609                private String myVersionId;
610
611                public String getParams() {
612                        return myParams;
613                }
614
615                public void setParams(String theParams) {
616                        myParams = theParams;
617                }
618
619                public String getResourceId() {
620                        return myResourceId;
621                }
622
623                public void setResourceId(String theResourceId) {
624                        myResourceId = theResourceId;
625                }
626
627                public String getResourceType() {
628                        return myResourceType;
629                }
630
631                public void setResourceType(String theResourceType) {
632                        myResourceType = theResourceType;
633                }
634
635                public String getVersionId() {
636                        return myVersionId;
637                }
638
639                public void setVersionId(String theVersionId) {
640                        myVersionId = theVersionId;
641                }
642        }
643}