001/*- 002 * #%L 003 * HAPI FHIR - Server Framework 004 * %% 005 * Copyright (C) 2014 - 2024 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.rest.server.util; 021 022import org.hl7.fhir.utilities.xhtml.XhtmlNode; 023import org.owasp.html.HtmlPolicyBuilder; 024import org.owasp.html.PolicyFactory; 025import org.owasp.html.Sanitizers; 026 027public class NarrativeUtil { 028 029 /** 030 * Non instantiable 031 */ 032 private NarrativeUtil() { 033 super(); 034 } 035 036 /** 037 * This method accepts an Xhtml (generally a narrative) and sanitizes it, 038 * removing unsafe elements. This method leverages the 039 * <a href="https://github.com/OWASP/java-html-sanitizer/blob/master/pom.xml">OWASP Java HTML Sanitizer</a> 040 * to perform this task. The policy allows the following: 041 * <ul> 042 * <li>Block tags are allowed</li> 043 * <li>Tables are allowed</li> 044 * <li>Basic styles are allowed but any styles considered unsafe are removed from the document (e.g. any style declarations that could be used to load external content)</li> 045 * <li>Attributes considered safe are allowed</li> 046 * <li>Any links (<a href="....") are removed although any text inside the link is retained</li> 047 * <li>All other elements and attributes are removed</li> 048 * </ul> 049 */ 050 public static String sanitizeHtmlFragment(String theHtml) { 051 PolicyFactory idPolicy = 052 new HtmlPolicyBuilder().allowAttributes("id").globally().toFactory(); 053 054 PolicyFactory policy = Sanitizers.FORMATTING 055 .and(Sanitizers.BLOCKS) 056 .and(Sanitizers.TABLES) 057 .and(Sanitizers.STYLES) 058 .and(idPolicy); 059 return policy.sanitize(theHtml); 060 } 061 062 /** 063 * This method accepts an Xhtml (generally a narrative) and sanitizes it, 064 * removing unsafe elements. This method leverages the 065 * <a href="https://github.com/OWASP/java-html-sanitizer/blob/master/pom.xml">OWASP Java HTML Sanitizer</a> 066 * to perform this task. The policy allows the following: 067 * <ul> 068 * <li>Block tags are allowed</li> 069 * <li>Tables are allowed</li> 070 * <li>Basic styles are allowed but any styles considered unsafe are removed from the document (e.g. any style declarations that could be used to load external content)</li> 071 * <li>Attributes considered safe are allowed</li> 072 * <li>Any links (<a href="....") are removed although any text inside the link is retained</li> 073 * <li>All other elements and attributes are removed</li> 074 * </ul> 075 */ 076 public static XhtmlNode sanitize(XhtmlNode theNode) { 077 String html = theNode.getValueAsString(); 078 079 String safeHTML = sanitizeHtmlFragment(html); 080 081 XhtmlNode retVal = new XhtmlNode(); 082 retVal.setValueAsString(safeHTML); 083 return retVal; 084 } 085}