001/*
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.context.FhirContext;
023import ca.uhn.fhir.context.RuntimeResourceDefinition;
024import ca.uhn.fhir.i18n.Msg;
025import ca.uhn.fhir.model.primitive.IdDt;
026import ca.uhn.fhir.parser.DataFormatException;
027import ca.uhn.fhir.rest.api.Constants;
028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
029import com.google.common.escape.Escaper;
030import com.google.common.net.PercentEscaper;
031import jakarta.annotation.Nonnull;
032import jakarta.annotation.Nullable;
033import org.apache.commons.lang3.StringUtils;
034import org.apache.http.NameValuePair;
035import org.apache.http.client.utils.URLEncodedUtils;
036import org.apache.http.message.BasicNameValuePair;
037import org.hl7.fhir.instance.model.api.IPrimitiveType;
038
039import java.io.UnsupportedEncodingException;
040import java.net.MalformedURLException;
041import java.net.URI;
042import java.net.URISyntaxException;
043import java.net.URL;
044import java.net.URLDecoder;
045import java.nio.file.Path;
046import java.nio.file.Paths;
047import java.util.ArrayList;
048import java.util.Collection;
049import java.util.HashMap;
050import java.util.List;
051import java.util.Map;
052import java.util.Map.Entry;
053import java.util.StringTokenizer;
054import java.util.stream.Collectors;
055
056import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
057import static org.apache.commons.lang3.StringUtils.defaultString;
058import static org.apache.commons.lang3.StringUtils.endsWith;
059import static org.apache.commons.lang3.StringUtils.isBlank;
060import static org.apache.commons.lang3.StringUtils.isNotBlank;
061
062@SuppressWarnings("JavadocLinkAsPlainText")
063public class UrlUtil {
064        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
065
066        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
067        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
068
069        /**
070         * Non instantiable
071         */
072        private UrlUtil() {}
073
074        /**
075         * Cleans up a value that will be serialized as an HTTP header. This method:
076         * <p>
077         * - Strips any newline (\r or \n) characters
078         *
079         * @since 6.2.0
080         */
081        public static String sanitizeHeaderValue(String theHeader) {
082                return theHeader.replace("\n", "").replace("\r", "");
083        }
084
085        public static String sanitizeBaseUrl(String theBaseUrl) {
086                return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", "");
087        }
088
089        /**
090         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
091         */
092        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
093                if (theEndpoint == null) {
094                        return null;
095                }
096                if (isAbsolute(theEndpoint)) {
097                        return theEndpoint;
098                }
099                if (theBase == null) {
100                        return theEndpoint;
101                }
102
103                try {
104                        return new URL(new URL(theBase), theEndpoint).toString();
105                } catch (MalformedURLException e) {
106                        ourLog.warn(
107                                        "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
108                        return theEndpoint;
109                }
110        }
111
112        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
113                if (theParentExtensionUrl == null) {
114                        return theExtensionUrl;
115                }
116                if (theExtensionUrl == null) {
117                        return null;
118                }
119
120                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
121                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
122
123                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
124                        return theExtensionUrl;
125                }
126
127                if (parentLastSlashIdx != childLastSlashIdx) {
128                        return theExtensionUrl;
129                }
130
131                if (!theParentExtensionUrl
132                                .substring(0, parentLastSlashIdx)
133                                .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
134                        return theExtensionUrl;
135                }
136
137                if (theExtensionUrl.length() > parentLastSlashIdx) {
138                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
139                }
140
141                return theExtensionUrl;
142        }
143
144        /**
145         * Given a FHIR resource URL, extracts the associated resource type. Supported formats
146         * include the following inputs, all of which will return {@literal Patient}. If no
147         * resource type can be determined, {@literal null} will be returned.
148         * <ul>
149         * <li>Patient
150         * <li>Patient?
151         * <li>Patient?identifier=foo
152         * <li>/Patient
153         * <li>/Patient?
154         * <li>/Patient?identifier=foo
155         * <li>http://foo/base/Patient?identifier=foo
156         * <li>http://foo/base/Patient/1
157         * <li>http://foo/base/Patient/1/_history/2
158         * <li>Patient/1
159         * <li>Patient/1/_history/2
160         * <li>/Patient/1
161         * <li>/Patient/1/_history/2
162         * </ul>
163         */
164        @Nullable
165        public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) {
166                if (theUrl == null) {
167                        return null;
168                }
169                if (theUrl.startsWith("urn:")) {
170                        return null;
171                }
172
173                String resourceType = null;
174                int qmIndex = theUrl.indexOf("?");
175                if (qmIndex > 0) {
176                        String urlResourceType = theUrl.substring(0, qmIndex);
177                        int slashIdx = urlResourceType.lastIndexOf('/');
178                        if (slashIdx != -1) {
179                                urlResourceType = urlResourceType.substring(slashIdx + 1);
180                        }
181                        if (isNotBlank(urlResourceType)) {
182                                resourceType = urlResourceType;
183                        }
184                } else {
185                        resourceType = theUrl;
186                        int slashIdx = resourceType.indexOf('/');
187                        if (slashIdx == 0) {
188                                resourceType = resourceType.substring(1);
189                        }
190
191                        slashIdx = resourceType.indexOf('/');
192                        if (slashIdx != -1) {
193                                resourceType = new IdDt(resourceType).getResourceType();
194                        }
195                }
196
197                try {
198                        if (isNotBlank(resourceType)) {
199                                theFhirContext.getResourceDefinition(resourceType);
200                        }
201                } catch (DataFormatException e) {
202                        return null;
203                }
204
205                return resourceType;
206        }
207
208        /**
209         * URL encode a value according to RFC 3986
210         * <p>
211         * This method is intended to be applied to an individual parameter
212         * name or value. For example, if you are creating the URL
213         * <code>http://example.com/fhir/Patient?key=føø</code>
214         * it would be appropriate to pass the string "føø" to this method,
215         * but not appropriate to pass the entire URL since characters
216         * such as "/" and "?" would also be escaped.
217         * </P>
218         */
219        public static String escapeUrlParam(String theUnescaped) {
220                if (theUnescaped == null) {
221                        return null;
222                }
223                return PARAMETER_ESCAPER.escape(theUnescaped);
224        }
225
226        /**
227         * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
228         * values in a collection
229         */
230        public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) {
231                return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList());
232        }
233
234        public static boolean isAbsolute(String theValue) {
235                String value = theValue.toLowerCase();
236                return value.startsWith("http://") || value.startsWith("https://");
237        }
238
239        public static boolean isNeedsSanitization(CharSequence theString) {
240                if (theString != null) {
241                        for (int i = 0; i < theString.length(); i++) {
242                                char nextChar = theString.charAt(i);
243                                switch (nextChar) {
244                                        case '\'':
245                                        case '"':
246                                        case '<':
247                                        case '>':
248                                        case '\n':
249                                        case '\r':
250                                                return true;
251                                }
252                                if (nextChar < ' ') {
253                                        return true;
254                                }
255                        }
256                }
257                return false;
258        }
259
260        public static boolean isValid(String theUrl) {
261                if (theUrl == null || theUrl.length() < 8) {
262                        return false;
263                }
264
265                String url = theUrl.toLowerCase();
266                if (url.charAt(0) != 'h') {
267                        return false;
268                }
269                if (url.charAt(1) != 't') {
270                        return false;
271                }
272                if (url.charAt(2) != 't') {
273                        return false;
274                }
275                if (url.charAt(3) != 'p') {
276                        return false;
277                }
278                int slashOffset;
279                if (url.charAt(4) == ':') {
280                        slashOffset = 5;
281                } else if (url.charAt(4) == 's') {
282                        if (url.charAt(5) != ':') {
283                                return false;
284                        }
285                        slashOffset = 6;
286                } else {
287                        return false;
288                }
289
290                if (url.charAt(slashOffset) != '/') {
291                        return false;
292                }
293                if (url.charAt(slashOffset + 1) != '/') {
294                        return false;
295                }
296
297                return true;
298        }
299
300        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl)
301                        throws DataFormatException {
302                String url = theUrl;
303                int paramIndex = url.indexOf('?');
304
305                // Change pattern of "Observation/?param=foo" into "Observation?param=foo"
306                if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
307                        url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
308                        paramIndex--;
309                }
310
311                String resourceName = url.substring(0, paramIndex);
312                if (resourceName.contains("/")) {
313                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
314                }
315                return theCtx.getResourceDefinition(resourceName);
316        }
317
318        @Nonnull
319        public static Map<String, String[]> parseQueryString(String theQueryString) {
320                HashMap<String, List<String>> map = new HashMap<>();
321                parseQueryString(theQueryString, map);
322                return toQueryStringMap(map);
323        }
324
325        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
326                String query = defaultString(theQueryString);
327                if (query.startsWith("?")) {
328                        query = query.substring(1);
329                }
330
331                StringTokenizer tok = new StringTokenizer(query, "&");
332                while (tok.hasMoreTokens()) {
333                        String nextToken = tok.nextToken();
334                        if (isBlank(nextToken)) {
335                                continue;
336                        }
337
338                        int equalsIndex = nextToken.indexOf('=');
339                        String nextValue;
340                        String nextKey;
341                        if (equalsIndex == -1) {
342                                nextKey = nextToken;
343                                nextValue = "";
344                        } else {
345                                nextKey = nextToken.substring(0, equalsIndex);
346                                nextValue = nextToken.substring(equalsIndex + 1);
347                        }
348
349                        nextKey = unescape(nextKey);
350                        nextValue = unescape(nextValue);
351
352                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
353                        list.add(nextValue);
354                }
355        }
356
357        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
358                HashMap<String, List<String>> map = new HashMap<>();
359                for (String next : theQueryString) {
360                        parseQueryString(next, map);
361                }
362                return toQueryStringMap(map);
363        }
364
365        /**
366         * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
367         * and any version identifiers or fragment hash is removed
368         */
369        public static String normalizeCanonicalUrlForComparison(String theUrl) {
370                String retVal;
371                try {
372                        retVal = new URI(theUrl).normalize().toString();
373                } catch (URISyntaxException e) {
374                        retVal = theUrl;
375                }
376                while (endsWith(retVal, "/")) {
377                        retVal = retVal.substring(0, retVal.length() - 1);
378                }
379                int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
380                if (hashOrPipeIndex != -1) {
381                        retVal = retVal.substring(0, hashOrPipeIndex);
382                }
383                return retVal;
384        }
385
386        /**
387         * Parse a URL in one of the following forms:
388         * <ul>
389         * <li>[Resource Type]?[Search Params]
390         * <li>[Resource Type]/[Resource ID]
391         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
392         * </ul>
393         */
394        public static UrlParts parseUrl(String theUrl) {
395                String url = theUrl;
396                UrlParts retVal = new UrlParts();
397                if (url.startsWith("http")) {
398                        int qmIdx = url.indexOf('?');
399                        if (qmIdx != -1) {
400                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
401                                url = url.substring(0, qmIdx);
402                        }
403
404                        IdDt id = new IdDt(url);
405                        retVal.setResourceType(id.getResourceType());
406                        retVal.setResourceId(id.getIdPart());
407                        retVal.setVersionId(id.getVersionIdPart());
408                        return retVal;
409                }
410
411                int parsingStart = 0;
412                if (url.length() > 2) {
413                        if (url.charAt(0) == '/') {
414                                if (Character.isLetter(url.charAt(1))) {
415                                        parsingStart = 1;
416                                }
417                        }
418                }
419
420                int nextStart = parsingStart;
421                boolean nextIsHistory = false;
422
423                for (int idx = parsingStart; idx < url.length(); idx++) {
424                        char nextChar = url.charAt(idx);
425                        boolean atEnd = (idx + 1) == url.length();
426                        if (nextChar == '?' || nextChar == '/' || atEnd) {
427                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
428                                String nextSubstring = url.substring(nextStart, endIdx);
429                                if (retVal.getResourceType() == null) {
430                                        retVal.setResourceType(nextSubstring);
431                                } else if (retVal.getResourceId() == null) {
432                                        retVal.setResourceId(nextSubstring);
433                                } else if (nextIsHistory) {
434                                        retVal.setVersionId(nextSubstring);
435                                } else {
436                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
437                                                nextIsHistory = true;
438                                        } else {
439                                                throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url);
440                                        }
441                                }
442                                if (nextChar == '?') {
443                                        if (url.length() > idx + 1) {
444                                                retVal.setParams(url.substring(idx + 1));
445                                        }
446                                        break;
447                                }
448                                nextStart = idx + 1;
449                        }
450                }
451
452                return retVal;
453        }
454
455        /**
456         * This method specifically HTML-encodes the &quot; and
457         * &lt; characters in order to prevent injection attacks
458         */
459        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
460                String retVal = null;
461                if (theString != null) {
462                        retVal = sanitizeUrlPart(theString.getValueAsString());
463                }
464                return retVal;
465        }
466
467        /**
468         * This method specifically HTML-encodes the &quot; and
469         * &lt; characters in order to prevent injection attacks.
470         * <p>
471         * The following characters are escaped:
472         * <ul>
473         *    <li>&apos;</li>
474         *    <li>&quot;</li>
475         *    <li>&lt;</li>
476         *    <li>&gt;</li>
477         *    <li>\n (newline)</li>
478         * </ul>
479         */
480        public static String sanitizeUrlPart(CharSequence theString) {
481                if (theString == null) {
482                        return null;
483                }
484
485                boolean needsSanitization = isNeedsSanitization(theString);
486
487                if (needsSanitization) {
488                        // Ok, we're sanitizing
489                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
490                        for (int j = 0; j < theString.length(); j++) {
491
492                                char nextChar = theString.charAt(j);
493                                switch (nextChar) {
494                                                /*
495                                                 * NB: If you add a constant here, you also need to add it
496                                                 * to isNeedsSanitization()!!
497                                                 */
498                                        case '\'':
499                                                buffer.append("&apos;");
500                                                break;
501                                        case '"':
502                                                buffer.append("&quot;");
503                                                break;
504                                        case '<':
505                                                buffer.append("&lt;");
506                                                break;
507                                        case '>':
508                                                buffer.append("&gt;");
509                                                break;
510                                        case '\n':
511                                                buffer.append("&#10;");
512                                                break;
513                                        case '\r':
514                                                buffer.append("&#13;");
515                                                break;
516                                        default:
517                                                if (nextChar >= ' ') {
518                                                        buffer.append(nextChar);
519                                                }
520                                                break;
521                                }
522                        } // for build escaped string
523
524                        return buffer.toString();
525                }
526
527                return theString.toString();
528        }
529
530        /**
531         * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
532         * same strings as the input but with sanitization applied
533         */
534        public static String[] sanitizeUrlPart(String[] theParameterValues) {
535                String[] retVal = null;
536                if (theParameterValues != null) {
537                        retVal = new String[theParameterValues.length];
538                        for (int i = 0; i < theParameterValues.length; i++) {
539                                retVal[i] = sanitizeUrlPart(theParameterValues[i]);
540                        }
541                }
542                return retVal;
543        }
544
545        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
546                HashMap<String, String[]> retVal = new HashMap<>();
547                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
548                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
549                }
550                return retVal;
551        }
552
553        public static String unescape(String theString) {
554                if (theString == null) {
555                        return null;
556                }
557                // If the user passes "_outputFormat" as a GET request parameter directly in the URL:
558                final boolean shouldEscapePlus = !theString.startsWith("application/");
559
560                for (int i = 0; i < theString.length(); i++) {
561                        char nextChar = theString.charAt(i);
562                        if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) {
563                                try {
564                                        // Yes it would be nice to not use a string "UTF-8" but the equivalent
565                                        // method that takes Charset is JDK10+ only... sigh....
566                                        return URLDecoder.decode(theString, "UTF-8");
567                                } catch (UnsupportedEncodingException e) {
568                                        throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e);
569                                }
570                        }
571                }
572                return theString;
573        }
574
575        public static List<NameValuePair> translateMatchUrl(String theMatchUrl) {
576                List<NameValuePair> parameters;
577                String matchUrl = theMatchUrl;
578                int questionMarkIndex = matchUrl.indexOf('?');
579                if (questionMarkIndex != -1) {
580                        matchUrl = matchUrl.substring(questionMarkIndex + 1);
581                }
582
583                final String[] searchList = new String[] {"|", "=>=", "=<=", "=>", "=<"};
584                final String[] replacementList = new String[] {"%7C", "=%3E%3D", "=%3C%3D", "=%3E", "=%3C"};
585                matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList);
586                if (matchUrl.contains(" ")) {
587                        throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl
588                                        + "] - URL is invalid (must not contain spaces)");
589                }
590
591                parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&');
592
593                // One issue that has happened before is people putting a "+" sign into an email address in a match URL
594                // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just
595                // assume they really meant "+".
596                for (int i = 0; i < parameters.size(); i++) {
597                        NameValuePair next = parameters.get(i);
598                        if (next.getName().equals("email") && next.getValue().contains(" ")) {
599                                BasicNameValuePair newPair =
600                                                new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+'));
601                                parameters.set(i, newPair);
602                        }
603                }
604
605                return parameters;
606        }
607
608        /**
609         * Creates list of sub URIs candidates for search with :above modifier
610         * Example input: http://[host]/[pathPart1]/[pathPart2]
611         * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2]
612         *
613         * @param theUri String URI parameter
614         * @return List of URI candidates
615         */
616        public static List<String> getAboveUriCandidates(String theUri) {
617                try {
618                        URI uri = new URI(theUri);
619                        if (uri.getScheme() == null || uri.getHost() == null) {
620                                throwInvalidRequestExceptionForNotValidUri(theUri, null);
621                        }
622                } catch (URISyntaxException theCause) {
623                        throwInvalidRequestExceptionForNotValidUri(theUri, theCause);
624                }
625
626                List<String> candidates = new ArrayList<>();
627                Path path = Paths.get(theUri);
628                candidates.add(path.toString().replace(":/", "://"));
629                while (path.getParent() != null && path.getParent().toString().contains("/")) {
630                        candidates.add(path.getParent().toString().replace(":/", "://"));
631                        path = path.getParent();
632                }
633                return candidates;
634        }
635
636        private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) {
637                throw new InvalidRequestException(
638                                Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause);
639        }
640
641        public static class UrlParts {
642                private String myParams;
643                private String myResourceId;
644                private String myResourceType;
645                private String myVersionId;
646
647                public String getParams() {
648                        return myParams;
649                }
650
651                public void setParams(String theParams) {
652                        myParams = theParams;
653                }
654
655                public String getResourceId() {
656                        return myResourceId;
657                }
658
659                public void setResourceId(String theResourceId) {
660                        myResourceId = theResourceId;
661                }
662
663                public String getResourceType() {
664                        return myResourceType;
665                }
666
667                public void setResourceType(String theResourceType) {
668                        myResourceType = theResourceType;
669                }
670
671                public String getVersionId() {
672                        return myVersionId;
673                }
674
675                public void setVersionId(String theVersionId) {
676                        myVersionId = theVersionId;
677                }
678        }
679}