001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import jakarta.annotation.Nonnull; 032import jakarta.annotation.Nullable; 033import org.apache.commons.lang3.StringUtils; 034import org.apache.http.NameValuePair; 035import org.apache.http.client.utils.URLEncodedUtils; 036import org.apache.http.message.BasicNameValuePair; 037import org.hl7.fhir.instance.model.api.IPrimitiveType; 038 039import java.io.UnsupportedEncodingException; 040import java.net.MalformedURLException; 041import java.net.URI; 042import java.net.URISyntaxException; 043import java.net.URL; 044import java.net.URLDecoder; 045import java.nio.file.Path; 046import java.nio.file.Paths; 047import java.util.ArrayList; 048import java.util.Collection; 049import java.util.HashMap; 050import java.util.List; 051import java.util.Map; 052import java.util.Map.Entry; 053import java.util.StringTokenizer; 054import java.util.stream.Collectors; 055 056import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 057import static org.apache.commons.lang3.StringUtils.defaultString; 058import static org.apache.commons.lang3.StringUtils.endsWith; 059import static org.apache.commons.lang3.StringUtils.isBlank; 060import static org.apache.commons.lang3.StringUtils.isNotBlank; 061 062@SuppressWarnings("JavadocLinkAsPlainText") 063public class UrlUtil { 064 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 065 066 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 067 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 068 069 /** 070 * Non instantiable 071 */ 072 private UrlUtil() {} 073 074 /** 075 * Cleans up a value that will be serialized as an HTTP header. This method: 076 * <p> 077 * - Strips any newline (\r or \n) characters 078 * 079 * @since 6.2.0 080 */ 081 public static String sanitizeHeaderValue(String theHeader) { 082 return theHeader.replace("\n", "").replace("\r", ""); 083 } 084 085 public static String sanitizeBaseUrl(String theBaseUrl) { 086 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 087 } 088 089 /** 090 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 091 */ 092 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 093 if (theEndpoint == null) { 094 return null; 095 } 096 if (isAbsolute(theEndpoint)) { 097 return theEndpoint; 098 } 099 if (theBase == null) { 100 return theEndpoint; 101 } 102 103 try { 104 return new URL(new URL(theBase), theEndpoint).toString(); 105 } catch (MalformedURLException e) { 106 ourLog.warn( 107 "Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 108 return theEndpoint; 109 } 110 } 111 112 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 113 if (theParentExtensionUrl == null) { 114 return theExtensionUrl; 115 } 116 if (theExtensionUrl == null) { 117 return null; 118 } 119 120 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 121 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 122 123 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 124 return theExtensionUrl; 125 } 126 127 if (parentLastSlashIdx != childLastSlashIdx) { 128 return theExtensionUrl; 129 } 130 131 if (!theParentExtensionUrl 132 .substring(0, parentLastSlashIdx) 133 .equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 134 return theExtensionUrl; 135 } 136 137 if (theExtensionUrl.length() > parentLastSlashIdx) { 138 return theExtensionUrl.substring(parentLastSlashIdx + 1); 139 } 140 141 return theExtensionUrl; 142 } 143 144 /** 145 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 146 * include the following inputs, all of which will return {@literal Patient}. If no 147 * resource type can be determined, {@literal null} will be returned. 148 * <ul> 149 * <li>Patient 150 * <li>Patient? 151 * <li>Patient?identifier=foo 152 * <li>/Patient 153 * <li>/Patient? 154 * <li>/Patient?identifier=foo 155 * <li>http://foo/base/Patient?identifier=foo 156 * <li>http://foo/base/Patient/1 157 * <li>http://foo/base/Patient/1/_history/2 158 * <li>Patient/1 159 * <li>Patient/1/_history/2 160 * <li>/Patient/1 161 * <li>/Patient/1/_history/2 162 * </ul> 163 */ 164 @Nullable 165 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 166 if (theUrl == null) { 167 return null; 168 } 169 if (theUrl.startsWith("urn:")) { 170 return null; 171 } 172 173 String resourceType = null; 174 int qmIndex = theUrl.indexOf("?"); 175 if (qmIndex > 0) { 176 String urlResourceType = theUrl.substring(0, qmIndex); 177 int slashIdx = urlResourceType.lastIndexOf('/'); 178 if (slashIdx != -1) { 179 urlResourceType = urlResourceType.substring(slashIdx + 1); 180 } 181 if (isNotBlank(urlResourceType)) { 182 resourceType = urlResourceType; 183 } 184 } else { 185 resourceType = theUrl; 186 int slashIdx = resourceType.indexOf('/'); 187 if (slashIdx == 0) { 188 resourceType = resourceType.substring(1); 189 } 190 191 slashIdx = resourceType.indexOf('/'); 192 if (slashIdx != -1) { 193 resourceType = new IdDt(resourceType).getResourceType(); 194 } 195 } 196 197 try { 198 if (isNotBlank(resourceType)) { 199 theFhirContext.getResourceDefinition(resourceType); 200 } 201 } catch (DataFormatException e) { 202 return null; 203 } 204 205 return resourceType; 206 } 207 208 /** 209 * URL encode a value according to RFC 3986 210 * <p> 211 * This method is intended to be applied to an individual parameter 212 * name or value. For example, if you are creating the URL 213 * <code>http://example.com/fhir/Patient?key=føø</code> 214 * it would be appropriate to pass the string "føø" to this method, 215 * but not appropriate to pass the entire URL since characters 216 * such as "/" and "?" would also be escaped. 217 * </P> 218 */ 219 public static String escapeUrlParam(String theUnescaped) { 220 if (theUnescaped == null) { 221 return null; 222 } 223 return PARAMETER_ESCAPER.escape(theUnescaped); 224 } 225 226 /** 227 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 228 * values in a collection 229 */ 230 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 231 return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList()); 232 } 233 234 public static boolean isAbsolute(String theValue) { 235 String value = theValue.toLowerCase(); 236 return value.startsWith("http://") || value.startsWith("https://"); 237 } 238 239 public static boolean isNeedsSanitization(CharSequence theString) { 240 if (theString != null) { 241 for (int i = 0; i < theString.length(); i++) { 242 char nextChar = theString.charAt(i); 243 switch (nextChar) { 244 case '\'': 245 case '"': 246 case '<': 247 case '>': 248 case '\n': 249 case '\r': 250 return true; 251 } 252 if (nextChar < ' ') { 253 return true; 254 } 255 } 256 } 257 return false; 258 } 259 260 public static boolean isValid(String theUrl) { 261 if (theUrl == null || theUrl.length() < 8) { 262 return false; 263 } 264 265 String url = theUrl.toLowerCase(); 266 if (url.charAt(0) != 'h') { 267 return false; 268 } 269 if (url.charAt(1) != 't') { 270 return false; 271 } 272 if (url.charAt(2) != 't') { 273 return false; 274 } 275 if (url.charAt(3) != 'p') { 276 return false; 277 } 278 int slashOffset; 279 if (url.charAt(4) == ':') { 280 slashOffset = 5; 281 } else if (url.charAt(4) == 's') { 282 if (url.charAt(5) != ':') { 283 return false; 284 } 285 slashOffset = 6; 286 } else { 287 return false; 288 } 289 290 if (url.charAt(slashOffset) != '/') { 291 return false; 292 } 293 if (url.charAt(slashOffset + 1) != '/') { 294 return false; 295 } 296 297 return true; 298 } 299 300 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) 301 throws DataFormatException { 302 String url = theUrl; 303 int paramIndex = url.indexOf('?'); 304 305 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 306 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 307 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 308 paramIndex--; 309 } 310 311 String resourceName = url.substring(0, paramIndex); 312 if (resourceName.contains("/")) { 313 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 314 } 315 return theCtx.getResourceDefinition(resourceName); 316 } 317 318 @Nonnull 319 public static Map<String, String[]> parseQueryString(String theQueryString) { 320 HashMap<String, List<String>> map = new HashMap<>(); 321 parseQueryString(theQueryString, map); 322 return toQueryStringMap(map); 323 } 324 325 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 326 String query = defaultString(theQueryString); 327 if (query.startsWith("?")) { 328 query = query.substring(1); 329 } 330 331 StringTokenizer tok = new StringTokenizer(query, "&"); 332 while (tok.hasMoreTokens()) { 333 String nextToken = tok.nextToken(); 334 if (isBlank(nextToken)) { 335 continue; 336 } 337 338 int equalsIndex = nextToken.indexOf('='); 339 String nextValue; 340 String nextKey; 341 if (equalsIndex == -1) { 342 nextKey = nextToken; 343 nextValue = ""; 344 } else { 345 nextKey = nextToken.substring(0, equalsIndex); 346 nextValue = nextToken.substring(equalsIndex + 1); 347 } 348 349 nextKey = unescape(nextKey); 350 nextValue = unescape(nextValue); 351 352 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 353 list.add(nextValue); 354 } 355 } 356 357 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 358 HashMap<String, List<String>> map = new HashMap<>(); 359 for (String next : theQueryString) { 360 parseQueryString(next, map); 361 } 362 return toQueryStringMap(map); 363 } 364 365 /** 366 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 367 * and any version identifiers or fragment hash is removed 368 */ 369 public static String normalizeCanonicalUrlForComparison(String theUrl) { 370 String retVal; 371 try { 372 retVal = new URI(theUrl).normalize().toString(); 373 } catch (URISyntaxException e) { 374 retVal = theUrl; 375 } 376 while (endsWith(retVal, "/")) { 377 retVal = retVal.substring(0, retVal.length() - 1); 378 } 379 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 380 if (hashOrPipeIndex != -1) { 381 retVal = retVal.substring(0, hashOrPipeIndex); 382 } 383 return retVal; 384 } 385 386 /** 387 * Parse a URL in one of the following forms: 388 * <ul> 389 * <li>[Resource Type]?[Search Params] 390 * <li>[Resource Type]/[Resource ID] 391 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 392 * </ul> 393 */ 394 public static UrlParts parseUrl(String theUrl) { 395 String url = theUrl; 396 UrlParts retVal = new UrlParts(); 397 if (url.startsWith("http")) { 398 int qmIdx = url.indexOf('?'); 399 if (qmIdx != -1) { 400 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 401 url = url.substring(0, qmIdx); 402 } 403 404 IdDt id = new IdDt(url); 405 retVal.setResourceType(id.getResourceType()); 406 retVal.setResourceId(id.getIdPart()); 407 retVal.setVersionId(id.getVersionIdPart()); 408 return retVal; 409 } 410 411 int parsingStart = 0; 412 if (url.length() > 2) { 413 if (url.charAt(0) == '/') { 414 if (Character.isLetter(url.charAt(1))) { 415 parsingStart = 1; 416 } 417 } 418 } 419 420 int nextStart = parsingStart; 421 boolean nextIsHistory = false; 422 423 for (int idx = parsingStart; idx < url.length(); idx++) { 424 char nextChar = url.charAt(idx); 425 boolean atEnd = (idx + 1) == url.length(); 426 if (nextChar == '?' || nextChar == '/' || atEnd) { 427 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 428 String nextSubstring = url.substring(nextStart, endIdx); 429 if (retVal.getResourceType() == null) { 430 retVal.setResourceType(nextSubstring); 431 } else if (retVal.getResourceId() == null) { 432 retVal.setResourceId(nextSubstring); 433 } else if (nextIsHistory) { 434 retVal.setVersionId(nextSubstring); 435 } else { 436 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 437 nextIsHistory = true; 438 } else { 439 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 440 } 441 } 442 if (nextChar == '?') { 443 if (url.length() > idx + 1) { 444 retVal.setParams(url.substring(idx + 1)); 445 } 446 break; 447 } 448 nextStart = idx + 1; 449 } 450 } 451 452 return retVal; 453 } 454 455 /** 456 * This method specifically HTML-encodes the " and 457 * < characters in order to prevent injection attacks 458 */ 459 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 460 String retVal = null; 461 if (theString != null) { 462 retVal = sanitizeUrlPart(theString.getValueAsString()); 463 } 464 return retVal; 465 } 466 467 /** 468 * This method specifically HTML-encodes the " and 469 * < characters in order to prevent injection attacks. 470 * <p> 471 * The following characters are escaped: 472 * <ul> 473 * <li>'</li> 474 * <li>"</li> 475 * <li><</li> 476 * <li>></li> 477 * <li>\n (newline)</li> 478 * </ul> 479 */ 480 public static String sanitizeUrlPart(CharSequence theString) { 481 if (theString == null) { 482 return null; 483 } 484 485 boolean needsSanitization = isNeedsSanitization(theString); 486 487 if (needsSanitization) { 488 // Ok, we're sanitizing 489 StringBuilder buffer = new StringBuilder(theString.length() + 10); 490 for (int j = 0; j < theString.length(); j++) { 491 492 char nextChar = theString.charAt(j); 493 switch (nextChar) { 494 /* 495 * NB: If you add a constant here, you also need to add it 496 * to isNeedsSanitization()!! 497 */ 498 case '\'': 499 buffer.append("'"); 500 break; 501 case '"': 502 buffer.append("""); 503 break; 504 case '<': 505 buffer.append("<"); 506 break; 507 case '>': 508 buffer.append(">"); 509 break; 510 case '\n': 511 buffer.append(" "); 512 break; 513 case '\r': 514 buffer.append(" "); 515 break; 516 default: 517 if (nextChar >= ' ') { 518 buffer.append(nextChar); 519 } 520 break; 521 } 522 } // for build escaped string 523 524 return buffer.toString(); 525 } 526 527 return theString.toString(); 528 } 529 530 /** 531 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 532 * same strings as the input but with sanitization applied 533 */ 534 public static String[] sanitizeUrlPart(String[] theParameterValues) { 535 String[] retVal = null; 536 if (theParameterValues != null) { 537 retVal = new String[theParameterValues.length]; 538 for (int i = 0; i < theParameterValues.length; i++) { 539 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 540 } 541 } 542 return retVal; 543 } 544 545 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 546 HashMap<String, String[]> retVal = new HashMap<>(); 547 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 548 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 549 } 550 return retVal; 551 } 552 553 public static String unescape(String theString) { 554 if (theString == null) { 555 return null; 556 } 557 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 558 final boolean shouldEscapePlus = !theString.startsWith("application/"); 559 560 for (int i = 0; i < theString.length(); i++) { 561 char nextChar = theString.charAt(i); 562 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 563 try { 564 // Yes it would be nice to not use a string "UTF-8" but the equivalent 565 // method that takes Charset is JDK10+ only... sigh.... 566 return URLDecoder.decode(theString, "UTF-8"); 567 } catch (UnsupportedEncodingException e) { 568 throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e); 569 } 570 } 571 } 572 return theString; 573 } 574 575 public static List<NameValuePair> translateMatchUrl(String theMatchUrl) { 576 List<NameValuePair> parameters; 577 String matchUrl = theMatchUrl; 578 int questionMarkIndex = matchUrl.indexOf('?'); 579 if (questionMarkIndex != -1) { 580 matchUrl = matchUrl.substring(questionMarkIndex + 1); 581 } 582 583 final String[] searchList = new String[] {"|", "=>=", "=<=", "=>", "=<"}; 584 final String[] replacementList = new String[] {"%7C", "=%3E%3D", "=%3C%3D", "=%3E", "=%3C"}; 585 matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList); 586 if (matchUrl.contains(" ")) { 587 throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl 588 + "] - URL is invalid (must not contain spaces)"); 589 } 590 591 parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&'); 592 593 // One issue that has happened before is people putting a "+" sign into an email address in a match URL 594 // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just 595 // assume they really meant "+". 596 for (int i = 0; i < parameters.size(); i++) { 597 NameValuePair next = parameters.get(i); 598 if (next.getName().equals("email") && next.getValue().contains(" ")) { 599 BasicNameValuePair newPair = 600 new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+')); 601 parameters.set(i, newPair); 602 } 603 } 604 605 return parameters; 606 } 607 608 /** 609 * Creates list of sub URIs candidates for search with :above modifier 610 * Example input: http://[host]/[pathPart1]/[pathPart2] 611 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2] 612 * 613 * @param theUri String URI parameter 614 * @return List of URI candidates 615 */ 616 public static List<String> getAboveUriCandidates(String theUri) { 617 try { 618 URI uri = new URI(theUri); 619 if (uri.getScheme() == null || uri.getHost() == null) { 620 throwInvalidRequestExceptionForNotValidUri(theUri, null); 621 } 622 } catch (URISyntaxException theCause) { 623 throwInvalidRequestExceptionForNotValidUri(theUri, theCause); 624 } 625 626 List<String> candidates = new ArrayList<>(); 627 Path path = Paths.get(theUri); 628 candidates.add(path.toString().replace(":/", "://")); 629 while (path.getParent() != null && path.getParent().toString().contains("/")) { 630 candidates.add(path.getParent().toString().replace(":/", "://")); 631 path = path.getParent(); 632 } 633 return candidates; 634 } 635 636 private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) { 637 throw new InvalidRequestException( 638 Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause); 639 } 640 641 public static class UrlParts { 642 private String myParams; 643 private String myResourceId; 644 private String myResourceType; 645 private String myVersionId; 646 647 public String getParams() { 648 return myParams; 649 } 650 651 public void setParams(String theParams) { 652 myParams = theParams; 653 } 654 655 public String getResourceId() { 656 return myResourceId; 657 } 658 659 public void setResourceId(String theResourceId) { 660 myResourceId = theResourceId; 661 } 662 663 public String getResourceType() { 664 return myResourceType; 665 } 666 667 public void setResourceType(String theResourceType) { 668 myResourceType = theResourceType; 669 } 670 671 public String getVersionId() { 672 return myVersionId; 673 } 674 675 public void setVersionId(String theVersionId) { 676 myVersionId = theVersionId; 677 } 678 } 679}