
001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2025 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import jakarta.annotation.Nonnull; 032import jakarta.annotation.Nullable; 033import org.apache.commons.lang3.StringUtils; 034import org.hl7.fhir.instance.model.api.IPrimitiveType; 035 036import java.io.UnsupportedEncodingException; 037import java.net.MalformedURLException; 038import java.net.URI; 039import java.net.URISyntaxException; 040import java.net.URL; 041import java.net.URLDecoder; 042import java.nio.file.Path; 043import java.nio.file.Paths; 044import java.util.ArrayList; 045import java.util.Collection; 046import java.util.HashMap; 047import java.util.List; 048import java.util.Map; 049import java.util.Map.Entry; 050import java.util.StringTokenizer; 051import java.util.stream.Collectors; 052 053import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 054import static org.apache.commons.lang3.StringUtils.defaultString; 055import static org.apache.commons.lang3.StringUtils.endsWith; 056import static org.apache.commons.lang3.StringUtils.isBlank; 057import static org.apache.commons.lang3.StringUtils.isNotBlank; 058 059@SuppressWarnings("JavadocLinkAsPlainText") 060public class UrlUtil { 061 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 062 063 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 064 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 065 066 /** 067 * Non instantiable 068 */ 069 private UrlUtil() {} 070 071 /** 072 * Cleans up a value that will be serialized as an HTTP header. This method: 073 * <p> 074 * - Strips any newline (\r or \n) characters 075 * 076 * @since 6.2.0 077 */ 078 public static String sanitizeHeaderValue(String theHeader) { 079 return theHeader.replace("\n", "").replace("\r", ""); 080 } 081 082 public static String sanitizeBaseUrl(String theBaseUrl) { 083 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 084 } 085 086 /** 087 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 088 */ 089 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 090 if (theEndpoint == null) { 091 return null; 092 } 093 if (isAbsolute(theEndpoint)) { 094 return theEndpoint; 095 } 096 if (theBase == null) { 097 return theEndpoint; 098 } 099 100 try { 101 return new URL(new URL(theBase), theEndpoint).toString(); 102 } catch (MalformedURLException e) { 103 ourLog.warn( 104 "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 105 return theEndpoint; 106 } 107 } 108 109 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 110 if (theParentExtensionUrl == null) { 111 return theExtensionUrl; 112 } 113 if (theExtensionUrl == null) { 114 return null; 115 } 116 117 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 118 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 119 120 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 121 return theExtensionUrl; 122 } 123 124 if (parentLastSlashIdx != childLastSlashIdx) { 125 return theExtensionUrl; 126 } 127 128 if (!theParentExtensionUrl 129 .substring(0, parentLastSlashIdx) 130 .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 131 return theExtensionUrl; 132 } 133 134 if (theExtensionUrl.length() > parentLastSlashIdx) { 135 return theExtensionUrl.substring(parentLastSlashIdx + 1); 136 } 137 138 return theExtensionUrl; 139 } 140 141 /** 142 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 143 * include the following inputs, all of which will return {@literal Patient}. If no 144 * resource type can be determined, {@literal null} will be returned. 145 * <ul> 146 * <li>Patient 147 * <li>Patient? 148 * <li>Patient?identifier=foo 149 * <li>/Patient 150 * <li>/Patient? 151 * <li>/Patient?identifier=foo 152 * <li>http://foo/base/Patient?identifier=foo 153 * <li>http://foo/base/Patient/1 154 * <li>http://foo/base/Patient/1/_history/2 155 * <li>Patient/1 156 * <li>Patient/1/_history/2 157 * <li>/Patient/1 158 * <li>/Patient/1/_history/2 159 * </ul> 160 */ 161 @Nullable 162 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 163 if (theUrl == null) { 164 return null; 165 } 166 if (theUrl.startsWith("urn:")) { 167 return null; 168 } 169 170 String resourceType = null; 171 int qmIndex = theUrl.indexOf("?"); 172 if (qmIndex > 0) { 173 String urlResourceType = theUrl.substring(0, qmIndex); 174 int slashIdx = urlResourceType.lastIndexOf('/'); 175 if (slashIdx != -1) { 176 urlResourceType = urlResourceType.substring(slashIdx + 1); 177 } 178 if (isNotBlank(urlResourceType)) { 179 resourceType = urlResourceType; 180 } 181 } else { 182 resourceType = theUrl; 183 int slashIdx = resourceType.indexOf('/'); 184 if (slashIdx == 0) { 185 resourceType = resourceType.substring(1); 186 } 187 188 slashIdx = resourceType.indexOf('/'); 189 if (slashIdx != -1) { 190 resourceType = new IdDt(resourceType).getResourceType(); 191 } 192 } 193 194 try { 195 if (isNotBlank(resourceType)) { 196 theFhirContext.getResourceDefinition(resourceType); 197 } 198 } catch (DataFormatException e) { 199 return null; 200 } 201 202 return resourceType; 203 } 204 205 /** 206 * URL encode a value according to RFC 3986 207 * <p> 208 * This method is intended to be applied to an individual parameter 209 * name or value. For example, if you are creating the URL 210 * <code>http://example.com/fhir/Patient?key=føø</code> 211 * it would be appropriate to pass the string "føø" to this method, 212 * but not appropriate to pass the entire URL since characters 213 * such as "/" and "?" would also be escaped. 214 * </P> 215 */ 216 public static String escapeUrlParam(String theUnescaped) { 217 if (theUnescaped == null) { 218 return null; 219 } 220 return PARAMETER_ESCAPER.escape(theUnescaped); 221 } 222 223 /** 224 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 225 * values in a collection 226 */ 227 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 228 return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList()); 229 } 230 231 public static boolean isAbsolute(String theValue) { 232 String value = theValue.toLowerCase(); 233 return value.startsWith("http://") || value.startsWith("https://"); 234 } 235 236 public static boolean isNeedsSanitization(CharSequence theString) { 237 if (theString != null) { 238 for (int i = 0; i < theString.length(); i++) { 239 char nextChar = theString.charAt(i); 240 switch (nextChar) { 241 case '\'': 242 case '"': 243 case '<': 244 case '>': 245 case '\n': 246 case '\r': 247 return true; 248 } 249 if (nextChar < ' ') { 250 return true; 251 } 252 } 253 } 254 return false; 255 } 256 257 public static boolean isValid(String theUrl) { 258 if (theUrl == null || theUrl.length() < 8) { 259 return false; 260 } 261 262 String url = theUrl.toLowerCase(); 263 if (url.charAt(0) != 'h') { 264 return false; 265 } 266 if (url.charAt(1) != 't') { 267 return false; 268 } 269 if (url.charAt(2) != 't') { 270 return false; 271 } 272 if (url.charAt(3) != 'p') { 273 return false; 274 } 275 int slashOffset; 276 if (url.charAt(4) == ':') { 277 slashOffset = 5; 278 } else if (url.charAt(4) == 's') { 279 if (url.charAt(5) != ':') { 280 return false; 281 } 282 slashOffset = 6; 283 } else { 284 return false; 285 } 286 287 if (url.charAt(slashOffset) != '/') { 288 return false; 289 } 290 if (url.charAt(slashOffset + 1) != '/') { 291 return false; 292 } 293 294 return true; 295 } 296 297 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) 298 throws DataFormatException { 299 String url = theUrl; 300 int paramIndex = url.indexOf('?'); 301 302 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 303 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 304 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 305 paramIndex--; 306 } 307 308 String resourceName = url.substring(0, paramIndex); 309 if (resourceName.contains("/")) { 310 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 311 } 312 return theCtx.getResourceDefinition(resourceName); 313 } 314 315 @Nonnull 316 public static Map<String, String[]> parseQueryString(String theQueryString) { 317 HashMap<String, List<String>> map = new HashMap<>(); 318 parseQueryString(theQueryString, map); 319 return toQueryStringMap(map); 320 } 321 322 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 323 String query = defaultString(theQueryString); 324 if (query.startsWith("?")) { 325 query = query.substring(1); 326 } 327 328 StringTokenizer tok = new StringTokenizer(query, "&"); 329 while (tok.hasMoreTokens()) { 330 String nextToken = tok.nextToken(); 331 if (isBlank(nextToken)) { 332 continue; 333 } 334 335 int equalsIndex = nextToken.indexOf('='); 336 String nextValue; 337 String nextKey; 338 if (equalsIndex == -1) { 339 nextKey = nextToken; 340 nextValue = ""; 341 } else { 342 nextKey = nextToken.substring(0, equalsIndex); 343 nextValue = nextToken.substring(equalsIndex + 1); 344 } 345 346 nextKey = unescape(nextKey); 347 nextValue = unescape(nextValue); 348 349 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 350 list.add(nextValue); 351 } 352 } 353 354 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 355 HashMap<String, List<String>> map = new HashMap<>(); 356 for (String next : theQueryString) { 357 parseQueryString(next, map); 358 } 359 return toQueryStringMap(map); 360 } 361 362 /** 363 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 364 * and any version identifiers or fragment hash is removed 365 */ 366 public static String normalizeCanonicalUrlForComparison(String theUrl) { 367 String retVal; 368 try { 369 retVal = new URI(theUrl).normalize().toString(); 370 } catch (URISyntaxException e) { 371 retVal = theUrl; 372 } 373 while (endsWith(retVal, "/")) { 374 retVal = retVal.substring(0, retVal.length() - 1); 375 } 376 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 377 if (hashOrPipeIndex != -1) { 378 retVal = retVal.substring(0, hashOrPipeIndex); 379 } 380 return retVal; 381 } 382 383 /** 384 * Parse a URL in one of the following forms: 385 * <ul> 386 * <li>[Resource Type]?[Search Params] 387 * <li>[Resource Type]/[Resource ID] 388 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 389 * </ul> 390 */ 391 public static UrlParts parseUrl(String theUrl) { 392 String url = theUrl; 393 UrlParts retVal = new UrlParts(); 394 if (url.startsWith("http")) { 395 int qmIdx = url.indexOf('?'); 396 if (qmIdx != -1) { 397 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 398 url = url.substring(0, qmIdx); 399 } 400 401 IdDt id = new IdDt(url); 402 retVal.setResourceType(id.getResourceType()); 403 retVal.setResourceId(id.getIdPart()); 404 retVal.setVersionId(id.getVersionIdPart()); 405 return retVal; 406 } 407 408 int parsingStart = 0; 409 if (url.length() > 2) { 410 if (url.charAt(0) == '/') { 411 if (Character.isLetter(url.charAt(1))) { 412 parsingStart = 1; 413 } 414 } 415 } 416 417 int nextStart = parsingStart; 418 boolean nextIsHistory = false; 419 420 for (int idx = parsingStart; idx < url.length(); idx++) { 421 char nextChar = url.charAt(idx); 422 boolean atEnd = (idx + 1) == url.length(); 423 if (nextChar == '?' || nextChar == '/' || atEnd) { 424 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 425 String nextSubstring = url.substring(nextStart, endIdx); 426 if (retVal.getResourceType() == null) { 427 retVal.setResourceType(nextSubstring); 428 } else if (retVal.getResourceId() == null) { 429 retVal.setResourceId(nextSubstring); 430 } else if (nextIsHistory) { 431 retVal.setVersionId(nextSubstring); 432 } else { 433 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 434 nextIsHistory = true; 435 } else { 436 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 437 } 438 } 439 if (nextChar == '?') { 440 if (url.length() > idx + 1) { 441 retVal.setParams(url.substring(idx + 1)); 442 } 443 break; 444 } 445 nextStart = idx + 1; 446 } 447 } 448 449 return retVal; 450 } 451 452 /** 453 * This method specifically HTML-encodes the " and 454 * < characters in order to prevent injection attacks 455 */ 456 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 457 String retVal = null; 458 if (theString != null) { 459 retVal = sanitizeUrlPart(theString.getValueAsString()); 460 } 461 return retVal; 462 } 463 464 /** 465 * This method specifically HTML-encodes the " and 466 * < characters in order to prevent injection attacks. 467 * <p> 468 * The following characters are escaped: 469 * <ul> 470 * <li>'</li> 471 * <li>"</li> 472 * <li><</li> 473 * <li>></li> 474 * <li>\n (newline)</li> 475 * </ul> 476 */ 477 public static String sanitizeUrlPart(CharSequence theString) { 478 if (theString == null) { 479 return null; 480 } 481 482 boolean needsSanitization = isNeedsSanitization(theString); 483 484 if (needsSanitization) { 485 // Ok, we're sanitizing 486 StringBuilder buffer = new StringBuilder(theString.length() + 10); 487 for (int j = 0; j < theString.length(); j++) { 488 489 char nextChar = theString.charAt(j); 490 switch (nextChar) { 491 /* 492 * NB: If you add a constant here, you also need to add it 493 * to isNeedsSanitization()!! 494 */ 495 case '\'': 496 buffer.append("'"); 497 break; 498 case '"': 499 buffer.append("""); 500 break; 501 case '<': 502 buffer.append("<"); 503 break; 504 case '>': 505 buffer.append(">"); 506 break; 507 case '\n': 508 buffer.append(" "); 509 break; 510 case '\r': 511 buffer.append(" "); 512 break; 513 default: 514 if (nextChar >= ' ') { 515 buffer.append(nextChar); 516 } 517 break; 518 } 519 } // for build escaped string 520 521 return buffer.toString(); 522 } 523 524 return theString.toString(); 525 } 526 527 /** 528 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 529 * same strings as the input but with sanitization applied 530 */ 531 public static String[] sanitizeUrlPart(String[] theParameterValues) { 532 String[] retVal = null; 533 if (theParameterValues != null) { 534 retVal = new String[theParameterValues.length]; 535 for (int i = 0; i < theParameterValues.length; i++) { 536 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 537 } 538 } 539 return retVal; 540 } 541 542 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 543 HashMap<String, String[]> retVal = new HashMap<>(); 544 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 545 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 546 } 547 return retVal; 548 } 549 550 public static String unescape(String theString) { 551 if (theString == null) { 552 return null; 553 } 554 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 555 final boolean shouldEscapePlus = !theString.startsWith("application/"); 556 557 for (int i = 0; i < theString.length(); i++) { 558 char nextChar = theString.charAt(i); 559 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 560 try { 561 // Yes it would be nice to not use a string "UTF-8" but the equivalent 562 // method that takes Charset is JDK10+ only... sigh.... 563 return URLDecoder.decode(theString, "UTF-8"); 564 } catch (UnsupportedEncodingException e) { 565 throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e); 566 } 567 } 568 } 569 return theString; 570 } 571 572 /** 573 * Creates list of sub URIs candidates for search with :above modifier 574 * Example input: http://[host]/[pathPart1]/[pathPart2] 575 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2] 576 * 577 * @param theUri String URI parameter 578 * @return List of URI candidates 579 */ 580 public static List<String> getAboveUriCandidates(String theUri) { 581 try { 582 URI uri = new URI(theUri); 583 if (uri.getScheme() == null || uri.getHost() == null) { 584 throwInvalidRequestExceptionForNotValidUri(theUri, null); 585 } 586 } catch (URISyntaxException theCause) { 587 throwInvalidRequestExceptionForNotValidUri(theUri, theCause); 588 } 589 590 List<String> candidates = new ArrayList<>(); 591 Path path = Paths.get(theUri); 592 candidates.add(path.toString().replace(":/", "://")); 593 while (path.getParent() != null && path.getParent().toString().contains("/")) { 594 candidates.add(path.getParent().toString().replace(":/", "://")); 595 path = path.getParent(); 596 } 597 return candidates; 598 } 599 600 private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) { 601 throw new InvalidRequestException( 602 Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause); 603 } 604 605 public static class UrlParts { 606 private String myParams; 607 private String myResourceId; 608 private String myResourceType; 609 private String myVersionId; 610 611 public String getParams() { 612 return myParams; 613 } 614 615 public void setParams(String theParams) { 616 myParams = theParams; 617 } 618 619 public String getResourceId() { 620 return myResourceId; 621 } 622 623 public void setResourceId(String theResourceId) { 624 myResourceId = theResourceId; 625 } 626 627 public String getResourceType() { 628 return myResourceType; 629 } 630 631 public void setResourceType(String theResourceType) { 632 myResourceType = theResourceType; 633 } 634 635 public String getVersionId() { 636 return myVersionId; 637 } 638 639 public void setVersionId(String theVersionId) { 640 myVersionId = theVersionId; 641 } 642 } 643}