
001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2025 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import jakarta.annotation.Nonnull; 032import jakarta.annotation.Nullable; 033import org.apache.commons.lang3.StringUtils; 034import org.hl7.fhir.instance.model.api.IPrimitiveType; 035 036import java.io.UnsupportedEncodingException; 037import java.net.MalformedURLException; 038import java.net.URI; 039import java.net.URISyntaxException; 040import java.net.URL; 041import java.net.URLDecoder; 042import java.nio.file.Path; 043import java.nio.file.Paths; 044import java.util.ArrayList; 045import java.util.Collection; 046import java.util.HashMap; 047import java.util.List; 048import java.util.Map; 049import java.util.Map.Entry; 050import java.util.StringTokenizer; 051import java.util.stream.Collectors; 052 053import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 054import static org.apache.commons.lang3.StringUtils.defaultString; 055import static org.apache.commons.lang3.StringUtils.endsWith; 056import static org.apache.commons.lang3.StringUtils.isBlank; 057import static org.apache.commons.lang3.StringUtils.isNotBlank; 058 059@SuppressWarnings("JavadocLinkAsPlainText") 060public class UrlUtil { 061 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 062 063 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 064 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 065 066 /** 067 * Non instantiable 068 */ 069 private UrlUtil() {} 070 071 /** 072 * Cleans up a value that will be serialized as an HTTP header. This method: 073 * <p> 074 * - Strips any newline (\r or \n) characters 075 * 076 * @since 6.2.0 077 */ 078 public static String sanitizeHeaderValue(String theHeader) { 079 return theHeader.replace("\n", "").replace("\r", ""); 080 } 081 082 public static String sanitizeBaseUrl(String theBaseUrl) { 083 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 084 } 085 086 /** 087 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 088 */ 089 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 090 if (theEndpoint == null) { 091 return null; 092 } 093 if (isAbsolute(theEndpoint)) { 094 return theEndpoint; 095 } 096 if (theBase == null) { 097 return theEndpoint; 098 } 099 100 try { 101 return new URL(new URL(theBase), theEndpoint).toString(); 102 } catch (MalformedURLException e) { 103 ourLog.warn( 104 "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 105 return theEndpoint; 106 } 107 } 108 109 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 110 if (theParentExtensionUrl == null) { 111 return theExtensionUrl; 112 } 113 if (theExtensionUrl == null) { 114 return null; 115 } 116 117 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 118 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 119 120 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 121 return theExtensionUrl; 122 } 123 124 if (parentLastSlashIdx != childLastSlashIdx) { 125 return theExtensionUrl; 126 } 127 128 if (!theParentExtensionUrl 129 .substring(0, parentLastSlashIdx) 130 .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 131 return theExtensionUrl; 132 } 133 134 if (theExtensionUrl.length() > parentLastSlashIdx) { 135 return theExtensionUrl.substring(parentLastSlashIdx + 1); 136 } 137 138 return theExtensionUrl; 139 } 140 141 /** 142 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 143 * include the following inputs, all of which will return {@literal Patient}. If no 144 * resource type can be determined, {@literal null} will be returned. 145 * <ul> 146 * <li>Patient 147 * <li>Patient? 148 * <li>Patient?identifier=foo 149 * <li>/Patient 150 * <li>/Patient? 151 * <li>/Patient?identifier=foo 152 * <li>http://foo/base/Patient?identifier=foo 153 * <li>http://foo/base/Patient/1 154 * <li>http://foo/base/Patient/1/_history/2 155 * <li>Patient/1 156 * <li>Patient/1/_history/2 157 * <li>/Patient/1 158 * <li>/Patient/1/_history/2 159 * </ul> 160 */ 161 @Nullable 162 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 163 if (theUrl == null) { 164 return null; 165 } 166 if (theUrl.startsWith("urn:")) { 167 return null; 168 } 169 170 String resourceType = null; 171 int qmIndex = theUrl.indexOf("?"); 172 if (qmIndex > 0) { 173 String urlResourceType = theUrl.substring(0, qmIndex); 174 int slashIdx = urlResourceType.lastIndexOf('/'); 175 if (slashIdx != -1) { 176 urlResourceType = urlResourceType.substring(slashIdx + 1); 177 } 178 if (isNotBlank(urlResourceType)) { 179 resourceType = urlResourceType; 180 } 181 } else { 182 resourceType = theUrl; 183 int slashIdx = resourceType.indexOf('/'); 184 if (slashIdx == 0) { 185 resourceType = resourceType.substring(1); 186 } 187 188 slashIdx = resourceType.indexOf('/'); 189 if (slashIdx != -1) { 190 resourceType = new IdDt(resourceType).getResourceType(); 191 } 192 } 193 194 try { 195 if (isNotBlank(resourceType)) { 196 theFhirContext.getResourceDefinition(resourceType); 197 } 198 } catch (DataFormatException e) { 199 return null; 200 } 201 202 return resourceType; 203 } 204 205 /** 206 * URL encode a value according to RFC 3986 207 * <p> 208 * This method is intended to be applied to an individual parameter 209 * name or value. For example, if you are creating the URL 210 * <code>http://example.com/fhir/Patient?key=føø</code> 211 * it would be appropriate to pass the string "føø" to this method, 212 * but not appropriate to pass the entire URL since characters 213 * such as "/" and "?" would also be escaped. 214 * </P> 215 */ 216 public static String escapeUrlParam(String theUnescaped) { 217 if (theUnescaped == null) { 218 return null; 219 } 220 return PARAMETER_ESCAPER.escape(theUnescaped); 221 } 222 223 /** 224 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 225 * values in a collection 226 */ 227 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 228 return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList()); 229 } 230 231 public static boolean isAbsolute(String theValue) { 232 String value = theValue.toLowerCase(); 233 return value.startsWith("http://") || value.startsWith("https://"); 234 } 235 236 public static boolean isNeedsSanitization(CharSequence theString) { 237 if (theString != null) { 238 for (int i = 0; i < theString.length(); i++) { 239 char nextChar = theString.charAt(i); 240 switch (nextChar) { 241 case '\'': 242 case '"': 243 case '<': 244 case '>': 245 case '\n': 246 case '\r': 247 return true; 248 } 249 if (nextChar < ' ') { 250 return true; 251 } 252 } 253 } 254 return false; 255 } 256 257 public static boolean isValid(String theUrl) { 258 if (theUrl == null || theUrl.length() < 8) { 259 return false; 260 } 261 262 String url = theUrl.toLowerCase(); 263 if (url.charAt(0) != 'h') { 264 return false; 265 } 266 if (url.charAt(1) != 't') { 267 return false; 268 } 269 if (url.charAt(2) != 't') { 270 return false; 271 } 272 if (url.charAt(3) != 'p') { 273 return false; 274 } 275 int slashOffset; 276 if (url.charAt(4) == ':') { 277 slashOffset = 5; 278 } else if (url.charAt(4) == 's') { 279 if (url.charAt(5) != ':') { 280 return false; 281 } 282 slashOffset = 6; 283 } else { 284 return false; 285 } 286 287 if (url.charAt(slashOffset) != '/') { 288 return false; 289 } 290 if (url.charAt(slashOffset + 1) != '/') { 291 return false; 292 } 293 294 return true; 295 } 296 297 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) 298 throws DataFormatException { 299 String url = theUrl; 300 int paramIndex = url.indexOf('?'); 301 302 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 303 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 304 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 305 paramIndex--; 306 } 307 308 String resourceName = url.substring(0, paramIndex); 309 if (resourceName.contains("/")) { 310 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 311 } 312 return theCtx.getResourceDefinition(resourceName); 313 } 314 315 @Nonnull 316 public static Map<String, String[]> parseQueryString(String theQueryString) { 317 HashMap<String, List<String>> map = new HashMap<>(); 318 parseQueryString(theQueryString, map); 319 return toQueryStringMap(map); 320 } 321 322 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 323 String query = defaultString(theQueryString); 324 325 int questionMarkIdx = query.indexOf('?'); 326 if (questionMarkIdx != -1) { 327 query = query.substring(questionMarkIdx + 1); 328 } 329 330 StringTokenizer tok = new StringTokenizer(query, "&"); 331 while (tok.hasMoreTokens()) { 332 String nextToken = tok.nextToken(); 333 if (isBlank(nextToken)) { 334 continue; 335 } 336 337 int equalsIndex = nextToken.indexOf('='); 338 String nextValue; 339 String nextKey; 340 if (equalsIndex == -1) { 341 nextKey = nextToken; 342 nextValue = ""; 343 } else { 344 nextKey = nextToken.substring(0, equalsIndex); 345 nextValue = nextToken.substring(equalsIndex + 1); 346 } 347 348 nextKey = unescape(nextKey); 349 nextValue = unescape(nextValue); 350 351 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 352 list.add(nextValue); 353 } 354 } 355 356 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 357 HashMap<String, List<String>> map = new HashMap<>(); 358 for (String next : theQueryString) { 359 parseQueryString(next, map); 360 } 361 return toQueryStringMap(map); 362 } 363 364 /** 365 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 366 * and any version identifiers or fragment hash is removed 367 */ 368 public static String normalizeCanonicalUrlForComparison(String theUrl) { 369 String retVal; 370 try { 371 retVal = new URI(theUrl).normalize().toString(); 372 } catch (URISyntaxException e) { 373 retVal = theUrl; 374 } 375 while (endsWith(retVal, "/")) { 376 retVal = retVal.substring(0, retVal.length() - 1); 377 } 378 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 379 if (hashOrPipeIndex != -1) { 380 retVal = retVal.substring(0, hashOrPipeIndex); 381 } 382 return retVal; 383 } 384 385 /** 386 * Parse a URL in one of the following forms: 387 * <ul> 388 * <li>[Resource Type]?[Search Params] 389 * <li>[Resource Type]/[Resource ID] 390 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 391 * </ul> 392 */ 393 public static UrlParts parseUrl(String theUrl) { 394 String url = theUrl; 395 UrlParts retVal = new UrlParts(); 396 if (url.startsWith("http")) { 397 int qmIdx = url.indexOf('?'); 398 if (qmIdx != -1) { 399 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 400 url = url.substring(0, qmIdx); 401 } 402 403 IdDt id = new IdDt(url); 404 retVal.setResourceType(id.getResourceType()); 405 retVal.setResourceId(id.getIdPart()); 406 retVal.setVersionId(id.getVersionIdPart()); 407 return retVal; 408 } 409 410 int parsingStart = 0; 411 if (url.length() > 2) { 412 if (url.charAt(0) == '/') { 413 if (Character.isLetter(url.charAt(1))) { 414 parsingStart = 1; 415 } 416 } 417 } 418 419 int nextStart = parsingStart; 420 boolean nextIsHistory = false; 421 422 for (int idx = parsingStart; idx < url.length(); idx++) { 423 char nextChar = url.charAt(idx); 424 boolean atEnd = (idx + 1) == url.length(); 425 if (nextChar == '?' || nextChar == '/' || atEnd) { 426 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 427 String nextSubstring = url.substring(nextStart, endIdx); 428 if (retVal.getResourceType() == null) { 429 retVal.setResourceType(nextSubstring); 430 } else if (retVal.getResourceId() == null) { 431 retVal.setResourceId(nextSubstring); 432 } else if (nextIsHistory) { 433 retVal.setVersionId(nextSubstring); 434 } else { 435 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 436 nextIsHistory = true; 437 } else { 438 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 439 } 440 } 441 if (nextChar == '?') { 442 if (url.length() > idx + 1) { 443 retVal.setParams(url.substring(idx + 1)); 444 } 445 break; 446 } 447 nextStart = idx + 1; 448 } 449 } 450 451 return retVal; 452 } 453 454 /** 455 * This method specifically HTML-encodes the " and 456 * < characters in order to prevent injection attacks 457 */ 458 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 459 String retVal = null; 460 if (theString != null) { 461 retVal = sanitizeUrlPart(theString.getValueAsString()); 462 } 463 return retVal; 464 } 465 466 /** 467 * This method specifically HTML-encodes the " and 468 * < characters in order to prevent injection attacks. 469 * <p> 470 * The following characters are escaped: 471 * <ul> 472 * <li>'</li> 473 * <li>"</li> 474 * <li><</li> 475 * <li>></li> 476 * <li>\n (newline)</li> 477 * </ul> 478 */ 479 public static String sanitizeUrlPart(CharSequence theString) { 480 if (theString == null) { 481 return null; 482 } 483 484 boolean needsSanitization = isNeedsSanitization(theString); 485 486 if (needsSanitization) { 487 // Ok, we're sanitizing 488 StringBuilder buffer = new StringBuilder(theString.length() + 10); 489 for (int j = 0; j < theString.length(); j++) { 490 491 char nextChar = theString.charAt(j); 492 switch (nextChar) { 493 /* 494 * NB: If you add a constant here, you also need to add it 495 * to isNeedsSanitization()!! 496 */ 497 case '\'': 498 buffer.append("'"); 499 break; 500 case '"': 501 buffer.append("""); 502 break; 503 case '<': 504 buffer.append("<"); 505 break; 506 case '>': 507 buffer.append(">"); 508 break; 509 case '\n': 510 buffer.append(" "); 511 break; 512 case '\r': 513 buffer.append(" "); 514 break; 515 default: 516 if (nextChar >= ' ') { 517 buffer.append(nextChar); 518 } 519 break; 520 } 521 } // for build escaped string 522 523 return buffer.toString(); 524 } 525 526 return theString.toString(); 527 } 528 529 /** 530 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 531 * same strings as the input but with sanitization applied 532 */ 533 public static String[] sanitizeUrlPart(String[] theParameterValues) { 534 String[] retVal = null; 535 if (theParameterValues != null) { 536 retVal = new String[theParameterValues.length]; 537 for (int i = 0; i < theParameterValues.length; i++) { 538 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 539 } 540 } 541 return retVal; 542 } 543 544 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 545 HashMap<String, String[]> retVal = new HashMap<>(); 546 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 547 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 548 } 549 return retVal; 550 } 551 552 public static String unescape(String theString) { 553 if (theString == null) { 554 return null; 555 } 556 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 557 final boolean shouldEscapePlus = !theString.startsWith("application/"); 558 559 for (int i = 0; i < theString.length(); i++) { 560 char nextChar = theString.charAt(i); 561 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 562 try { 563 // Yes it would be nice to not use a string "UTF-8" but the equivalent 564 // method that takes Charset is JDK10+ only... sigh.... 565 return URLDecoder.decode(theString, "UTF-8"); 566 } catch (UnsupportedEncodingException e) { 567 throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e); 568 } 569 } 570 } 571 return theString; 572 } 573 574 /** 575 * Creates list of sub URIs candidates for search with :above modifier 576 * Example input: http://[host]/[pathPart1]/[pathPart2] 577 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2] 578 * 579 * @param theUri String URI parameter 580 * @return List of URI candidates 581 */ 582 public static List<String> getAboveUriCandidates(String theUri) { 583 try { 584 URI uri = new URI(theUri); 585 if (uri.getScheme() == null || uri.getHost() == null) { 586 throwInvalidRequestExceptionForNotValidUri(theUri, null); 587 } 588 } catch (URISyntaxException theCause) { 589 throwInvalidRequestExceptionForNotValidUri(theUri, theCause); 590 } 591 592 List<String> candidates = new ArrayList<>(); 593 Path path = Paths.get(theUri); 594 candidates.add(path.toString().replace(":/", "://")); 595 while (path.getParent() != null && path.getParent().toString().contains("/")) { 596 candidates.add(path.getParent().toString().replace(":/", "://")); 597 path = path.getParent(); 598 } 599 return candidates; 600 } 601 602 private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) { 603 throw new InvalidRequestException( 604 Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause); 605 } 606 607 public static class UrlParts { 608 private String myParams; 609 private String myResourceId; 610 private String myResourceType; 611 private String myVersionId; 612 613 public String getParams() { 614 return myParams; 615 } 616 617 public void setParams(String theParams) { 618 myParams = theParams; 619 } 620 621 public String getResourceId() { 622 return myResourceId; 623 } 624 625 public void setResourceId(String theResourceId) { 626 myResourceId = theResourceId; 627 } 628 629 public String getResourceType() { 630 return myResourceType; 631 } 632 633 public void setResourceType(String theResourceType) { 634 myResourceType = theResourceType; 635 } 636 637 public String getVersionId() { 638 return myVersionId; 639 } 640 641 public void setVersionId(String theVersionId) { 642 myVersionId = theVersionId; 643 } 644 } 645}