
001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2025 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import jakarta.annotation.Nonnull; 032import jakarta.annotation.Nullable; 033import org.apache.commons.lang3.StringUtils; 034import org.hl7.fhir.instance.model.api.IPrimitiveType; 035 036import java.net.MalformedURLException; 037import java.net.URI; 038import java.net.URISyntaxException; 039import java.net.URL; 040import java.net.URLDecoder; 041import java.nio.charset.StandardCharsets; 042import java.nio.file.Path; 043import java.nio.file.Paths; 044import java.util.ArrayList; 045import java.util.Collection; 046import java.util.HashMap; 047import java.util.List; 048import java.util.Map; 049import java.util.Map.Entry; 050import java.util.StringTokenizer; 051import java.util.stream.Collectors; 052 053import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 054import static org.apache.commons.lang3.StringUtils.defaultString; 055import static org.apache.commons.lang3.StringUtils.endsWith; 056import static org.apache.commons.lang3.StringUtils.isBlank; 057import static org.apache.commons.lang3.StringUtils.isNotBlank; 058 059@SuppressWarnings("JavadocLinkAsPlainText") 060public class UrlUtil { 061 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 062 063 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 064 private static final Escaper PARAMETER_ESCAPER_NO_SLASH = 065 new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS + "/", false); 066 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 067 068 /** 069 * Non instantiable 070 */ 071 private UrlUtil() {} 072 073 /** 074 * Cleans up a value that will be serialized as an HTTP header. This method: 075 * <p> 076 * - Strips any newline (\r or \n) characters 077 * 078 * @since 6.2.0 079 */ 080 public static String sanitizeHeaderValue(String theHeader) { 081 return theHeader.replace("\n", "").replace("\r", ""); 082 } 083 084 public static String sanitizeBaseUrl(String theBaseUrl) { 085 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 086 } 087 088 /** 089 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 090 */ 091 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 092 if (theEndpoint == null) { 093 return null; 094 } 095 if (isAbsolute(theEndpoint)) { 096 return theEndpoint; 097 } 098 if (theBase == null) { 099 return theEndpoint; 100 } 101 102 try { 103 return new URL(new URL(theBase), theEndpoint).toString(); 104 } catch (MalformedURLException e) { 105 ourLog.warn( 106 "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 107 return theEndpoint; 108 } 109 } 110 111 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 112 if (theParentExtensionUrl == null) { 113 return theExtensionUrl; 114 } 115 if (theExtensionUrl == null) { 116 return null; 117 } 118 119 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 120 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 121 122 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 123 return theExtensionUrl; 124 } 125 126 if (parentLastSlashIdx != childLastSlashIdx) { 127 return theExtensionUrl; 128 } 129 130 if (!theParentExtensionUrl 131 .substring(0, parentLastSlashIdx) 132 .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 133 return theExtensionUrl; 134 } 135 136 if (theExtensionUrl.length() > parentLastSlashIdx) { 137 return theExtensionUrl.substring(parentLastSlashIdx + 1); 138 } 139 140 return theExtensionUrl; 141 } 142 143 /** 144 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 145 * include the following inputs, all of which will return {@literal Patient}. If no 146 * resource type can be determined, {@literal null} will be returned. 147 * <ul> 148 * <li>Patient 149 * <li>Patient? 150 * <li>Patient?identifier=foo 151 * <li>/Patient 152 * <li>/Patient? 153 * <li>/Patient?identifier=foo 154 * <li>http://foo/base/Patient?identifier=foo 155 * <li>http://foo/base/Patient/1 156 * <li>http://foo/base/Patient/1/_history/2 157 * <li>Patient/1 158 * <li>Patient/1/_history/2 159 * <li>/Patient/1 160 * <li>/Patient/1/_history/2 161 * </ul> 162 */ 163 @Nullable 164 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 165 if (theUrl == null) { 166 return null; 167 } 168 if (theUrl.startsWith("urn:")) { 169 return null; 170 } 171 172 String resourceType = null; 173 int qmIndex = theUrl.indexOf("?"); 174 if (qmIndex > 0) { 175 String urlResourceType = theUrl.substring(0, qmIndex); 176 int slashIdx = urlResourceType.lastIndexOf('/'); 177 if (slashIdx != -1) { 178 urlResourceType = urlResourceType.substring(slashIdx + 1); 179 } 180 if (isNotBlank(urlResourceType)) { 181 resourceType = urlResourceType; 182 } 183 } else { 184 resourceType = theUrl; 185 int slashIdx = resourceType.indexOf('/'); 186 if (slashIdx == 0) { 187 resourceType = resourceType.substring(1); 188 } 189 190 slashIdx = resourceType.indexOf('/'); 191 if (slashIdx != -1) { 192 resourceType = new IdDt(resourceType).getResourceType(); 193 } 194 } 195 196 try { 197 if (isNotBlank(resourceType)) { 198 theFhirContext.getResourceDefinition(resourceType); 199 } 200 } catch (DataFormatException e) { 201 return null; 202 } 203 204 return resourceType; 205 } 206 207 /** 208 * URL encode a value according to RFC 3986, except for the following 209 * characters: <code>-_.*</code>. 210 * <p> 211 * This method is intended to be applied to an individual parameter 212 * name or value. For example, if you are creating the URL 213 * <code>http://example.com/fhir/Patient?key=føø</code> 214 * it would be appropriate to pass the string "føø" to this method, 215 * but not appropriate to pass the entire URL since characters 216 * such as "/" and "?" would also be escaped. 217 * </p> 218 * 219 * @see #escapeUrlParam(String, boolean) 220 */ 221 public static String escapeUrlParam(String theUnescaped) { 222 return escapeUrlParam(theUnescaped, true); 223 } 224 225 /** 226 * URL encode a value according to RFC 3986, except for the following 227 * characters: <code>-_.*</code>, and optionally <code>/</code>. 228 * <p> 229 * This method is intended to be applied to an individual parameter 230 * name or value. For example, if you are creating the URL 231 * <code>http://example.com/fhir/Patient?key=føø</code> 232 * it would be appropriate to pass the string "føø" to this method, 233 * but not appropriate to pass the entire URL since characters 234 * such as "?" and possibly "/" would also be escaped. 235 * </p> 236 * 237 * @param theEscapeSlash If <code>true</code>, the slash character will be percent-escaped. 238 * Set this to false if you are escaping a query parameter value, since slashes 239 * will be more readable in the URL than the percent-encoded version. If you 240 * aren't sure where the escaped version will appear, always set this to 241 * <code>false</code>, or just call {@link #escapeUrlParam(String)} instead. 242 * @since 8.6.0 243 */ 244 public static String escapeUrlParam(String theUnescaped, boolean theEscapeSlash) { 245 if (theUnescaped == null) { 246 return null; 247 } 248 if (theEscapeSlash) { 249 return PARAMETER_ESCAPER.escape(theUnescaped); 250 } else { 251 return PARAMETER_ESCAPER_NO_SLASH.escape(theUnescaped); 252 } 253 } 254 255 /** 256 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 257 * values in a collection 258 */ 259 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 260 return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList()); 261 } 262 263 public static boolean isAbsolute(String theValue) { 264 String value = theValue.toLowerCase(); 265 return value.startsWith("http://") || value.startsWith("https://"); 266 } 267 268 public static boolean isNeedsSanitization(CharSequence theString) { 269 if (theString != null) { 270 for (int i = 0; i < theString.length(); i++) { 271 char nextChar = theString.charAt(i); 272 switch (nextChar) { 273 case '\'': 274 case '"': 275 case '<': 276 case '>': 277 case '\n': 278 case '\r': 279 return true; 280 } 281 if (nextChar < ' ') { 282 return true; 283 } 284 } 285 } 286 return false; 287 } 288 289 public static boolean isValid(String theUrl) { 290 if (theUrl == null || theUrl.length() < 8) { 291 return false; 292 } 293 294 String url = theUrl.toLowerCase(); 295 if (url.charAt(0) != 'h') { 296 return false; 297 } 298 if (url.charAt(1) != 't') { 299 return false; 300 } 301 if (url.charAt(2) != 't') { 302 return false; 303 } 304 if (url.charAt(3) != 'p') { 305 return false; 306 } 307 int slashOffset; 308 if (url.charAt(4) == ':') { 309 slashOffset = 5; 310 } else if (url.charAt(4) == 's') { 311 if (url.charAt(5) != ':') { 312 return false; 313 } 314 slashOffset = 6; 315 } else { 316 return false; 317 } 318 319 if (url.charAt(slashOffset) != '/') { 320 return false; 321 } 322 if (url.charAt(slashOffset + 1) != '/') { 323 return false; 324 } 325 326 return true; 327 } 328 329 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) 330 throws DataFormatException { 331 String url = theUrl; 332 int paramIndex = url.indexOf('?'); 333 334 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 335 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 336 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 337 paramIndex--; 338 } 339 340 String resourceName = url.substring(0, paramIndex); 341 if (resourceName.contains("/")) { 342 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 343 } 344 return theCtx.getResourceDefinition(resourceName); 345 } 346 347 @Nonnull 348 public static Map<String, String[]> parseQueryString(String theQueryString) { 349 HashMap<String, List<String>> map = new HashMap<>(); 350 parseQueryString(theQueryString, map); 351 return toQueryStringMap(map); 352 } 353 354 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 355 String query = defaultString(theQueryString); 356 if (query.startsWith("?")) { 357 query = query.substring(1); 358 } 359 360 StringTokenizer tok = new StringTokenizer(query, "&"); 361 while (tok.hasMoreTokens()) { 362 String nextToken = tok.nextToken(); 363 if (isBlank(nextToken)) { 364 continue; 365 } 366 367 int equalsIndex = nextToken.indexOf('='); 368 String nextValue; 369 String nextKey; 370 if (equalsIndex == -1) { 371 nextKey = nextToken; 372 nextValue = ""; 373 } else { 374 nextKey = nextToken.substring(0, equalsIndex); 375 nextValue = nextToken.substring(equalsIndex + 1); 376 } 377 378 nextKey = unescape(nextKey); 379 nextValue = unescape(nextValue); 380 381 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 382 list.add(nextValue); 383 } 384 } 385 386 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 387 HashMap<String, List<String>> map = new HashMap<>(); 388 for (String next : theQueryString) { 389 parseQueryString(next, map); 390 } 391 return toQueryStringMap(map); 392 } 393 394 /** 395 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 396 * and any version identifiers or fragment hash is removed 397 */ 398 public static String normalizeCanonicalUrlForComparison(String theUrl) { 399 String retVal; 400 try { 401 retVal = new URI(theUrl).normalize().toString(); 402 } catch (URISyntaxException e) { 403 retVal = theUrl; 404 } 405 while (endsWith(retVal, "/")) { 406 retVal = retVal.substring(0, retVal.length() - 1); 407 } 408 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 409 if (hashOrPipeIndex != -1) { 410 retVal = retVal.substring(0, hashOrPipeIndex); 411 } 412 return retVal; 413 } 414 415 /** 416 * Parse a URL in one of the following forms: 417 * <ul> 418 * <li>[Resource Type]?[Search Params] 419 * <li>[Resource Type]/[Resource ID] 420 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 421 * </ul> 422 */ 423 public static UrlParts parseUrl(String theUrl) { 424 String url = theUrl; 425 UrlParts retVal = new UrlParts(); 426 if (url.startsWith("http")) { 427 int qmIdx = url.indexOf('?'); 428 if (qmIdx != -1) { 429 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 430 url = url.substring(0, qmIdx); 431 } 432 433 IdDt id = new IdDt(url); 434 retVal.setResourceType(id.getResourceType()); 435 retVal.setResourceId(id.getIdPart()); 436 retVal.setVersionId(id.getVersionIdPart()); 437 return retVal; 438 } 439 440 int parsingStart = 0; 441 if (url.length() > 2) { 442 if (url.charAt(0) == '/') { 443 if (Character.isLetter(url.charAt(1))) { 444 parsingStart = 1; 445 } 446 } 447 } 448 449 int nextStart = parsingStart; 450 boolean nextIsHistory = false; 451 452 for (int idx = parsingStart; idx < url.length(); idx++) { 453 char nextChar = url.charAt(idx); 454 boolean atEnd = (idx + 1) == url.length(); 455 if (nextChar == '?' || nextChar == '/' || atEnd) { 456 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 457 String nextSubstring = url.substring(nextStart, endIdx); 458 if (retVal.getResourceType() == null) { 459 retVal.setResourceType(nextSubstring); 460 } else if (retVal.getResourceId() == null) { 461 retVal.setResourceId(nextSubstring); 462 } else if (nextIsHistory) { 463 retVal.setVersionId(nextSubstring); 464 } else { 465 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 466 nextIsHistory = true; 467 } else { 468 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 469 } 470 } 471 if (nextChar == '?') { 472 if (url.length() > idx + 1) { 473 retVal.setParams(url.substring(idx + 1)); 474 } 475 break; 476 } 477 nextStart = idx + 1; 478 } 479 } 480 481 return retVal; 482 } 483 484 /** 485 * This method specifically HTML-encodes the " and 486 * < characters in order to prevent injection attacks 487 */ 488 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 489 String retVal = null; 490 if (theString != null) { 491 retVal = sanitizeUrlPart(theString.getValueAsString()); 492 } 493 return retVal; 494 } 495 496 /** 497 * This method specifically HTML-encodes the " and 498 * < characters in order to prevent injection attacks. 499 * <p> 500 * The following characters are escaped: 501 * <ul> 502 * <li>'</li> 503 * <li>"</li> 504 * <li><</li> 505 * <li>></li> 506 * <li>\n (newline)</li> 507 * </ul> 508 */ 509 public static String sanitizeUrlPart(CharSequence theString) { 510 if (theString == null) { 511 return null; 512 } 513 514 boolean needsSanitization = isNeedsSanitization(theString); 515 516 if (needsSanitization) { 517 // Ok, we're sanitizing 518 StringBuilder buffer = new StringBuilder(theString.length() + 10); 519 for (int j = 0; j < theString.length(); j++) { 520 521 char nextChar = theString.charAt(j); 522 switch (nextChar) { 523 /* 524 * NB: If you add a constant here, you also need to add it 525 * to isNeedsSanitization()!! 526 */ 527 case '\'': 528 buffer.append("'"); 529 break; 530 case '"': 531 buffer.append("""); 532 break; 533 case '<': 534 buffer.append("<"); 535 break; 536 case '>': 537 buffer.append(">"); 538 break; 539 case '\n': 540 buffer.append(" "); 541 break; 542 case '\r': 543 buffer.append(" "); 544 break; 545 default: 546 if (nextChar >= ' ') { 547 buffer.append(nextChar); 548 } 549 break; 550 } 551 } // for build escaped string 552 553 return buffer.toString(); 554 } 555 556 return theString.toString(); 557 } 558 559 /** 560 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 561 * same strings as the input but with sanitization applied 562 */ 563 public static String[] sanitizeUrlPart(String[] theParameterValues) { 564 String[] retVal = null; 565 if (theParameterValues != null) { 566 retVal = new String[theParameterValues.length]; 567 for (int i = 0; i < theParameterValues.length; i++) { 568 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 569 } 570 } 571 return retVal; 572 } 573 574 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 575 HashMap<String, String[]> retVal = new HashMap<>(); 576 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 577 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 578 } 579 return retVal; 580 } 581 582 public static String unescape(String theString) { 583 if (theString == null) { 584 return null; 585 } 586 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 587 final boolean shouldEscapePlus = !theString.startsWith("application/"); 588 589 for (int i = 0; i < theString.length(); i++) { 590 char nextChar = theString.charAt(i); 591 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 592 return URLDecoder.decode(theString, StandardCharsets.UTF_8); 593 } 594 } 595 return theString; 596 } 597 598 /** 599 * Creates list of sub URIs candidates for search with :above modifier 600 * Example input: http://[host]/[pathPart1]/[pathPart2] 601 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2] 602 * 603 * @param theUri String URI parameter 604 * @return List of URI candidates 605 */ 606 public static List<String> getAboveUriCandidates(String theUri) { 607 try { 608 URI uri = new URI(theUri); 609 if (uri.getScheme() == null || uri.getHost() == null) { 610 throwInvalidRequestExceptionForNotValidUri(theUri, null); 611 } 612 } catch (URISyntaxException theCause) { 613 throwInvalidRequestExceptionForNotValidUri(theUri, theCause); 614 } 615 616 List<String> candidates = new ArrayList<>(); 617 Path path = Paths.get(theUri); 618 candidates.add(path.toString().replace(":/", "://")); 619 while (path.getParent() != null && path.getParent().toString().contains("/")) { 620 candidates.add(path.getParent().toString().replace(":/", "://")); 621 path = path.getParent(); 622 } 623 return candidates; 624 } 625 626 private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) { 627 throw new InvalidRequestException( 628 Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause); 629 } 630 631 public static class UrlParts { 632 private String myParams; 633 private String myResourceId; 634 private String myResourceType; 635 private String myVersionId; 636 637 public String getParams() { 638 return myParams; 639 } 640 641 public void setParams(String theParams) { 642 myParams = theParams; 643 } 644 645 public String getResourceId() { 646 return myResourceId; 647 } 648 649 public void setResourceId(String theResourceId) { 650 myResourceId = theResourceId; 651 } 652 653 public String getResourceType() { 654 return myResourceType; 655 } 656 657 public void setResourceType(String theResourceType) { 658 myResourceType = theResourceType; 659 } 660 661 public String getVersionId() { 662 return myVersionId; 663 } 664 665 public void setVersionId(String theVersionId) { 666 myVersionId = theVersionId; 667 } 668 } 669}