001/*-
002 * #%L
003 * HAPI FHIR - Server Framework
004 * %%
005 * Copyright (C) 2014 - 2025 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.rest.server.util;
021
022import org.hl7.fhir.utilities.xhtml.XhtmlNode;
023import org.owasp.html.HtmlPolicyBuilder;
024import org.owasp.html.PolicyFactory;
025import org.owasp.html.Sanitizers;
026
027public class NarrativeUtil {
028
029        /**
030         * Non instantiable
031         */
032        private NarrativeUtil() {
033                super();
034        }
035
036        /**
037         * This method accepts an Xhtml (generally a narrative) and sanitizes it,
038         * removing unsafe elements. This method leverages the
039         * <a href="https://github.com/OWASP/java-html-sanitizer/blob/master/pom.xml">OWASP Java HTML Sanitizer</a>
040         * to perform this task. The policy allows the following:
041         * <ul>
042         *    <li>Block tags are allowed</li>
043         *    <li>Tables are allowed</li>
044         *    <li>Basic styles are allowed but any styles considered unsafe are removed from the document (e.g. any style declarations that could be used to load external content)</li>
045         *    <li>Attributes considered safe are allowed</li>
046         *    <li>Any links (&lta href="....") are removed although any text inside the link is retained</li>
047         *    <li>All other elements and attributes are removed</li>
048         * </ul>
049         */
050        public static String sanitizeHtmlFragment(String theHtml) {
051                PolicyFactory idPolicy =
052                                new HtmlPolicyBuilder().allowAttributes("id").globally().toFactory();
053
054                PolicyFactory policy = Sanitizers.FORMATTING
055                                .and(Sanitizers.BLOCKS)
056                                .and(Sanitizers.TABLES)
057                                .and(Sanitizers.STYLES)
058                                .and(idPolicy);
059                return policy.sanitize(theHtml);
060        }
061
062        /**
063         * This method accepts an Xhtml (generally a narrative) and sanitizes it,
064         * removing unsafe elements. This method leverages the
065         * <a href="https://github.com/OWASP/java-html-sanitizer/blob/master/pom.xml">OWASP Java HTML Sanitizer</a>
066         * to perform this task. The policy allows the following:
067         * <ul>
068         *    <li>Block tags are allowed</li>
069         *    <li>Tables are allowed</li>
070         *    <li>Basic styles are allowed but any styles considered unsafe are removed from the document (e.g. any style declarations that could be used to load external content)</li>
071         *    <li>Attributes considered safe are allowed</li>
072         *    <li>Any links (&lta href="....") are removed although any text inside the link is retained</li>
073         *    <li>All other elements and attributes are removed</li>
074         * </ul>
075         */
076        public static XhtmlNode sanitize(XhtmlNode theNode) {
077                String html = theNode.getValueAsString();
078
079                String safeHTML = sanitizeHtmlFragment(html);
080
081                XhtmlNode retVal = new XhtmlNode();
082                retVal.setValueAsString(safeHTML);
083                return retVal;
084        }
085}