
001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2025 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import jakarta.annotation.Nonnull; 032import jakarta.annotation.Nullable; 033import org.apache.commons.lang3.StringUtils; 034import org.hl7.fhir.instance.model.api.IPrimitiveType; 035 036import java.net.MalformedURLException; 037import java.net.URI; 038import java.net.URISyntaxException; 039import java.net.URL; 040import java.net.URLDecoder; 041import java.nio.charset.StandardCharsets; 042import java.nio.file.Path; 043import java.nio.file.Paths; 044import java.util.ArrayList; 045import java.util.Collection; 046import java.util.HashMap; 047import java.util.List; 048import java.util.Map; 049import java.util.Map.Entry; 050import java.util.StringTokenizer; 051import java.util.stream.Collectors; 052 053import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 054import static org.apache.commons.lang3.StringUtils.defaultString; 055import static org.apache.commons.lang3.StringUtils.endsWith; 056import static org.apache.commons.lang3.StringUtils.isBlank; 057import static org.apache.commons.lang3.StringUtils.isNotBlank; 058 059@SuppressWarnings("JavadocLinkAsPlainText") 060public class UrlUtil { 061 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 062 063 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 064 private static final Escaper PARAMETER_ESCAPER_NO_SLASH = 065 new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS + "/", false); 066 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 067 068 /** 069 * Non instantiable 070 */ 071 private UrlUtil() {} 072 073 /** 074 * Cleans up a value that will be serialized as an HTTP header. This method: 075 * <p> 076 * - Strips any newline (\r or \n) characters 077 * 078 * @since 6.2.0 079 */ 080 public static String sanitizeHeaderValue(String theHeader) { 081 return theHeader.replace("\n", "").replace("\r", ""); 082 } 083 084 public static String sanitizeBaseUrl(String theBaseUrl) { 085 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 086 } 087 088 /** 089 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 090 */ 091 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 092 if (theEndpoint == null) { 093 return null; 094 } 095 if (isAbsolute(theEndpoint)) { 096 return theEndpoint; 097 } 098 if (theBase == null) { 099 return theEndpoint; 100 } 101 102 try { 103 return new URL(new URL(theBase), theEndpoint).toString(); 104 } catch (MalformedURLException e) { 105 ourLog.warn( 106 "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 107 return theEndpoint; 108 } 109 } 110 111 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 112 if (theParentExtensionUrl == null) { 113 return theExtensionUrl; 114 } 115 if (theExtensionUrl == null) { 116 return null; 117 } 118 119 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 120 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 121 122 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 123 return theExtensionUrl; 124 } 125 126 if (parentLastSlashIdx != childLastSlashIdx) { 127 return theExtensionUrl; 128 } 129 130 if (!theParentExtensionUrl 131 .substring(0, parentLastSlashIdx) 132 .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 133 return theExtensionUrl; 134 } 135 136 if (theExtensionUrl.length() > parentLastSlashIdx) { 137 return theExtensionUrl.substring(parentLastSlashIdx + 1); 138 } 139 140 return theExtensionUrl; 141 } 142 143 /** 144 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 145 * include the following inputs, all of which will return {@literal Patient}. If no 146 * resource type can be determined, {@literal null} will be returned. 147 * <ul> 148 * <li>Patient 149 * <li>Patient? 150 * <li>Patient?identifier=foo 151 * <li>/Patient 152 * <li>/Patient? 153 * <li>/Patient?identifier=foo 154 * <li>http://foo/base/Patient?identifier=foo 155 * <li>http://foo/base/Patient/1 156 * <li>http://foo/base/Patient/1/_history/2 157 * <li>Patient/1 158 * <li>Patient/1/_history/2 159 * <li>/Patient/1 160 * <li>/Patient/1/_history/2 161 * </ul> 162 */ 163 @Nullable 164 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 165 if (theUrl == null) { 166 return null; 167 } 168 if (theUrl.startsWith("urn:")) { 169 return null; 170 } 171 172 String resourceType = null; 173 int qmIndex = theUrl.indexOf("?"); 174 if (qmIndex > 0) { 175 String urlResourceType = theUrl.substring(0, qmIndex); 176 int slashIdx = urlResourceType.lastIndexOf('/'); 177 if (slashIdx != -1) { 178 urlResourceType = urlResourceType.substring(slashIdx + 1); 179 } 180 if (isNotBlank(urlResourceType)) { 181 resourceType = urlResourceType; 182 } 183 } else { 184 resourceType = theUrl; 185 int slashIdx = resourceType.indexOf('/'); 186 if (slashIdx == 0) { 187 resourceType = resourceType.substring(1); 188 } 189 190 slashIdx = resourceType.indexOf('/'); 191 if (slashIdx != -1) { 192 resourceType = new IdDt(resourceType).getResourceType(); 193 } 194 } 195 196 try { 197 if (isNotBlank(resourceType)) { 198 theFhirContext.getResourceDefinition(resourceType); 199 } 200 } catch (DataFormatException e) { 201 return null; 202 } 203 204 return resourceType; 205 } 206 207 /** 208 * URL encode a value according to RFC 3986, except for the following 209 * characters: <code>-_.*</code>. 210 * <p> 211 * This method is intended to be applied to an individual parameter 212 * name or value. For example, if you are creating the URL 213 * <code>http://example.com/fhir/Patient?key=føø</code> 214 * it would be appropriate to pass the string "føø" to this method, 215 * but not appropriate to pass the entire URL since characters 216 * such as "/" and "?" would also be escaped. 217 * </p> 218 * 219 * @see #escapeUrlParam(String, boolean) 220 */ 221 public static String escapeUrlParam(String theUnescaped) { 222 return escapeUrlParam(theUnescaped, true); 223 } 224 225 /** 226 * URL encode a value according to RFC 3986, except for the following 227 * characters: <code>-_.*</code>, and optionally <code>/</code>. 228 * <p> 229 * This method is intended to be applied to an individual parameter 230 * name or value. For example, if you are creating the URL 231 * <code>http://example.com/fhir/Patient?key=føø</code> 232 * it would be appropriate to pass the string "føø" to this method, 233 * but not appropriate to pass the entire URL since characters 234 * such as "?" and possibly "/" would also be escaped. 235 * </p> 236 * 237 * @param theEscapeSlash If <code>true</code>, the slash character will be percent-escaped. 238 * Set this to false if you are escaping a query parameter value, since slashes 239 * will be more readable in the URL than the percent-encoded version. If you 240 * aren't sure where the escaped version will appear, always set this to 241 * <code>false</code>, or just call {@link #escapeUrlParam(String)} instead. 242 * @since 8.6.0 243 */ 244 public static String escapeUrlParam(String theUnescaped, boolean theEscapeSlash) { 245 if (theUnescaped == null) { 246 return null; 247 } 248 if (theEscapeSlash) { 249 return PARAMETER_ESCAPER.escape(theUnescaped); 250 } else { 251 return PARAMETER_ESCAPER_NO_SLASH.escape(theUnescaped); 252 } 253 } 254 255 /** 256 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 257 * values in a collection 258 */ 259 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 260 return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList()); 261 } 262 263 public static boolean isAbsolute(String theValue) { 264 String value = theValue.toLowerCase(); 265 return value.startsWith("http://") || value.startsWith("https://"); 266 } 267 268 public static boolean isNeedsSanitization(CharSequence theString) { 269 if (theString != null) { 270 for (int i = 0; i < theString.length(); i++) { 271 char nextChar = theString.charAt(i); 272 switch (nextChar) { 273 case '\'': 274 case '"': 275 case '<': 276 case '>': 277 case '\n': 278 case '\r': 279 return true; 280 } 281 if (nextChar < ' ') { 282 return true; 283 } 284 } 285 } 286 return false; 287 } 288 289 public static boolean isValid(String theUrl) { 290 if (theUrl == null || theUrl.length() < 8) { 291 return false; 292 } 293 294 String url = theUrl.toLowerCase(); 295 if (url.charAt(0) != 'h') { 296 return false; 297 } 298 if (url.charAt(1) != 't') { 299 return false; 300 } 301 if (url.charAt(2) != 't') { 302 return false; 303 } 304 if (url.charAt(3) != 'p') { 305 return false; 306 } 307 int slashOffset; 308 if (url.charAt(4) == ':') { 309 slashOffset = 5; 310 } else if (url.charAt(4) == 's') { 311 if (url.charAt(5) != ':') { 312 return false; 313 } 314 slashOffset = 6; 315 } else { 316 return false; 317 } 318 319 if (url.charAt(slashOffset) != '/') { 320 return false; 321 } 322 if (url.charAt(slashOffset + 1) != '/') { 323 return false; 324 } 325 326 return true; 327 } 328 329 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) 330 throws DataFormatException { 331 String url = theUrl; 332 int paramIndex = url.indexOf('?'); 333 334 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 335 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 336 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 337 paramIndex--; 338 } 339 340 String resourceName = url.substring(0, paramIndex); 341 if (resourceName.contains("/")) { 342 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 343 } 344 return theCtx.getResourceDefinition(resourceName); 345 } 346 347 @Nonnull 348 public static Map<String, String[]> parseQueryString(String theQueryString) { 349 HashMap<String, List<String>> map = new HashMap<>(); 350 parseQueryString(theQueryString, map); 351 return toQueryStringMap(map); 352 } 353 354 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 355 String query = defaultString(theQueryString); 356 357 int questionMarkIdx = query.indexOf('?'); 358 if (questionMarkIdx != -1) { 359 query = query.substring(questionMarkIdx + 1); 360 } 361 362 StringTokenizer tok = new StringTokenizer(query, "&"); 363 while (tok.hasMoreTokens()) { 364 String nextToken = tok.nextToken(); 365 if (isBlank(nextToken)) { 366 continue; 367 } 368 369 int equalsIndex = nextToken.indexOf('='); 370 String nextValue; 371 String nextKey; 372 if (equalsIndex == -1) { 373 nextKey = nextToken; 374 nextValue = ""; 375 } else { 376 nextKey = nextToken.substring(0, equalsIndex); 377 nextValue = nextToken.substring(equalsIndex + 1); 378 } 379 380 nextKey = unescape(nextKey); 381 nextValue = unescape(nextValue); 382 383 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 384 list.add(nextValue); 385 } 386 } 387 388 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 389 HashMap<String, List<String>> map = new HashMap<>(); 390 for (String next : theQueryString) { 391 parseQueryString(next, map); 392 } 393 return toQueryStringMap(map); 394 } 395 396 /** 397 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 398 * and any version identifiers or fragment hash is removed 399 */ 400 public static String normalizeCanonicalUrlForComparison(String theUrl) { 401 String retVal; 402 try { 403 retVal = new URI(theUrl).normalize().toString(); 404 } catch (URISyntaxException e) { 405 retVal = theUrl; 406 } 407 while (endsWith(retVal, "/")) { 408 retVal = retVal.substring(0, retVal.length() - 1); 409 } 410 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 411 if (hashOrPipeIndex != -1) { 412 retVal = retVal.substring(0, hashOrPipeIndex); 413 } 414 return retVal; 415 } 416 417 /** 418 * Parse a URL in one of the following forms: 419 * <ul> 420 * <li>[Resource Type]?[Search Params] 421 * <li>[Resource Type]/[Resource ID] 422 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 423 * </ul> 424 */ 425 public static UrlParts parseUrl(String theUrl) { 426 String url = theUrl; 427 UrlParts retVal = new UrlParts(); 428 if (url.startsWith("http")) { 429 int qmIdx = url.indexOf('?'); 430 if (qmIdx != -1) { 431 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 432 url = url.substring(0, qmIdx); 433 } 434 435 IdDt id = new IdDt(url); 436 retVal.setResourceType(id.getResourceType()); 437 retVal.setResourceId(id.getIdPart()); 438 retVal.setVersionId(id.getVersionIdPart()); 439 return retVal; 440 } 441 442 int parsingStart = 0; 443 if (url.length() > 2) { 444 if (url.charAt(0) == '/') { 445 if (Character.isLetter(url.charAt(1))) { 446 parsingStart = 1; 447 } 448 } 449 } 450 451 int nextStart = parsingStart; 452 boolean nextIsHistory = false; 453 454 for (int idx = parsingStart; idx < url.length(); idx++) { 455 char nextChar = url.charAt(idx); 456 boolean atEnd = (idx + 1) == url.length(); 457 if (nextChar == '?' || nextChar == '/' || atEnd) { 458 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 459 String nextSubstring = url.substring(nextStart, endIdx); 460 if (retVal.getResourceType() == null) { 461 retVal.setResourceType(nextSubstring); 462 } else if (retVal.getResourceId() == null) { 463 retVal.setResourceId(nextSubstring); 464 } else if (nextIsHistory) { 465 retVal.setVersionId(nextSubstring); 466 } else { 467 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 468 nextIsHistory = true; 469 } else { 470 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 471 } 472 } 473 if (nextChar == '?') { 474 if (url.length() > idx + 1) { 475 retVal.setParams(url.substring(idx + 1)); 476 } 477 break; 478 } 479 nextStart = idx + 1; 480 } 481 } 482 483 return retVal; 484 } 485 486 /** 487 * This method specifically HTML-encodes the " and 488 * < characters in order to prevent injection attacks 489 */ 490 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 491 String retVal = null; 492 if (theString != null) { 493 retVal = sanitizeUrlPart(theString.getValueAsString()); 494 } 495 return retVal; 496 } 497 498 /** 499 * This method specifically HTML-encodes the " and 500 * < characters in order to prevent injection attacks. 501 * <p> 502 * The following characters are escaped: 503 * <ul> 504 * <li>'</li> 505 * <li>"</li> 506 * <li><</li> 507 * <li>></li> 508 * <li>\n (newline)</li> 509 * </ul> 510 */ 511 public static String sanitizeUrlPart(CharSequence theString) { 512 if (theString == null) { 513 return null; 514 } 515 516 boolean needsSanitization = isNeedsSanitization(theString); 517 518 if (needsSanitization) { 519 // Ok, we're sanitizing 520 StringBuilder buffer = new StringBuilder(theString.length() + 10); 521 for (int j = 0; j < theString.length(); j++) { 522 523 char nextChar = theString.charAt(j); 524 switch (nextChar) { 525 /* 526 * NB: If you add a constant here, you also need to add it 527 * to isNeedsSanitization()!! 528 */ 529 case '\'': 530 buffer.append("'"); 531 break; 532 case '"': 533 buffer.append("""); 534 break; 535 case '<': 536 buffer.append("<"); 537 break; 538 case '>': 539 buffer.append(">"); 540 break; 541 case '\n': 542 buffer.append(" "); 543 break; 544 case '\r': 545 buffer.append(" "); 546 break; 547 default: 548 if (nextChar >= ' ') { 549 buffer.append(nextChar); 550 } 551 break; 552 } 553 } // for build escaped string 554 555 return buffer.toString(); 556 } 557 558 return theString.toString(); 559 } 560 561 /** 562 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 563 * same strings as the input but with sanitization applied 564 */ 565 public static String[] sanitizeUrlPart(String[] theParameterValues) { 566 String[] retVal = null; 567 if (theParameterValues != null) { 568 retVal = new String[theParameterValues.length]; 569 for (int i = 0; i < theParameterValues.length; i++) { 570 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 571 } 572 } 573 return retVal; 574 } 575 576 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 577 HashMap<String, String[]> retVal = new HashMap<>(); 578 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 579 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 580 } 581 return retVal; 582 } 583 584 public static String unescape(String theString) { 585 if (theString == null) { 586 return null; 587 } 588 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 589 final boolean shouldEscapePlus = !theString.startsWith("application/"); 590 591 for (int i = 0; i < theString.length(); i++) { 592 char nextChar = theString.charAt(i); 593 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 594 return URLDecoder.decode(theString, StandardCharsets.UTF_8); 595 } 596 } 597 return theString; 598 } 599 600 /** 601 * Creates list of sub URIs candidates for search with :above modifier 602 * Example input: http://[host]/[pathPart1]/[pathPart2] 603 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2] 604 * 605 * @param theUri String URI parameter 606 * @return List of URI candidates 607 */ 608 public static List<String> getAboveUriCandidates(String theUri) { 609 try { 610 URI uri = new URI(theUri); 611 if (uri.getScheme() == null || uri.getHost() == null) { 612 throwInvalidRequestExceptionForNotValidUri(theUri, null); 613 } 614 } catch (URISyntaxException theCause) { 615 throwInvalidRequestExceptionForNotValidUri(theUri, theCause); 616 } 617 618 List<String> candidates = new ArrayList<>(); 619 Path path = Paths.get(theUri); 620 candidates.add(path.toString().replace(":/", "://")); 621 while (path.getParent() != null && path.getParent().toString().contains("/")) { 622 candidates.add(path.getParent().toString().replace(":/", "://")); 623 path = path.getParent(); 624 } 625 return candidates; 626 } 627 628 private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) { 629 throw new InvalidRequestException( 630 Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause); 631 } 632 633 public static class UrlParts { 634 private String myParams; 635 private String myResourceId; 636 private String myResourceType; 637 private String myVersionId; 638 639 public String getParams() { 640 return myParams; 641 } 642 643 public void setParams(String theParams) { 644 myParams = theParams; 645 } 646 647 public String getResourceId() { 648 return myResourceId; 649 } 650 651 public void setResourceId(String theResourceId) { 652 myResourceId = theResourceId; 653 } 654 655 public String getResourceType() { 656 return myResourceType; 657 } 658 659 public void setResourceType(String theResourceType) { 660 myResourceType = theResourceType; 661 } 662 663 public String getVersionId() { 664 return myVersionId; 665 } 666 667 public void setVersionId(String theVersionId) { 668 myVersionId = theVersionId; 669 } 670 } 671}