001package org.hl7.fhir.dstu3.elementmodel; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Comparator; 040import java.util.List; 041 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.SAXParserFactory; 045import javax.xml.transform.Transformer; 046import javax.xml.transform.TransformerFactory; 047import javax.xml.transform.dom.DOMResult; 048import javax.xml.transform.sax.SAXSource; 049 050import org.hl7.fhir.dstu3.context.IWorkerContext; 051import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement; 052import org.hl7.fhir.dstu3.formats.FormatUtilities; 053import org.hl7.fhir.dstu3.formats.IParser.OutputStyle; 054import org.hl7.fhir.dstu3.model.DateTimeType; 055import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation; 056import org.hl7.fhir.dstu3.model.Enumeration; 057import org.hl7.fhir.dstu3.model.StructureDefinition; 058import org.hl7.fhir.dstu3.utils.ToolingExtensions; 059import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator; 060import org.hl7.fhir.dstu3.utils.formats.XmlLocationData; 061import org.hl7.fhir.exceptions.DefinitionException; 062import org.hl7.fhir.exceptions.FHIRException; 063import org.hl7.fhir.exceptions.FHIRFormatError; 064import org.hl7.fhir.utilities.StringPair; 065import org.hl7.fhir.utilities.Utilities; 066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 068import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 069import org.hl7.fhir.utilities.xhtml.XhtmlNode; 070import org.hl7.fhir.utilities.xhtml.XhtmlParser; 071import org.hl7.fhir.utilities.xml.IXMLWriter; 072import org.hl7.fhir.utilities.xml.XMLUtil; 073import org.hl7.fhir.utilities.xml.XMLWriter; 074import org.w3c.dom.Document; 075import org.w3c.dom.Node; 076import org.xml.sax.InputSource; 077import org.xml.sax.XMLReader; 078 079public class XmlParser extends ParserBase { 080 private boolean allowXsiLocation; 081 082 public XmlParser(IWorkerContext context) { 083 super(context); 084 } 085 086 087 public boolean isAllowXsiLocation() { 088 return allowXsiLocation; 089 } 090 091 public void setAllowXsiLocation(boolean allowXsiLocation) { 092 this.allowXsiLocation = allowXsiLocation; 093 } 094 095 096 public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 097 Document doc = null; 098 try { 099 DocumentBuilderFactory factory = XMLUtil.newXXEProtectedDocumentBuilderFactory(); 100 // xxe protection 101 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 102 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 103 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 104 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 105 factory.setXIncludeAware(false); 106 factory.setExpandEntityReferences(false); 107 108 factory.setNamespaceAware(true); 109 if (policy == ValidationPolicy.EVERYTHING) { 110 // use a slower parser that keeps location data 111 TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory(); 112 Transformer nullTransformer = transformerFactory.newTransformer(); 113 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 114 doc = docBuilder.newDocument(); 115 DOMResult domResult = new DOMResult(doc); 116 SAXParserFactory spf = XMLUtil.newXXEProtectedSaxParserFactory(); 117 spf.setNamespaceAware(true); 118 spf.setValidating(false); 119 XMLReader xmlReader = XMLUtil.getXXEProtectedXMLReader(spf); 120 121 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 122 InputSource inputSource = new InputSource(stream); 123 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 124 nullTransformer.transform(saxSource, domResult); 125 } else { 126 DocumentBuilder builder = factory.newDocumentBuilder(); 127 doc = builder.parse(stream); 128 } 129 } catch (Exception e) { 130 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 131 doc = null; 132 } 133 if (doc == null) 134 return null; 135 else 136 return parse(doc); 137 } 138 139 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 140 if (policy == ValidationPolicy.EVERYTHING) { 141 Node node = document.getFirstChild(); 142 while (node != null) { 143 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 144 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 145 node = node.getNextSibling(); 146 } 147 } 148 } 149 150 151 private int line(Node node) { 152 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 153 return loc == null ? 0 : loc.getStartLine(); 154 } 155 156 private int col(Node node) { 157 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 158 return loc == null ? 0 : loc.getStartColumn(); 159 } 160 161 public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 162 checkForProcessingInstruction(doc); 163 org.w3c.dom.Element element = doc.getDocumentElement(); 164 return parse(element); 165 } 166 167 public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 168 String ns = element.getNamespaceURI(); 169 String name = element.getLocalName(); 170 String path = "/"+pathPrefix(ns)+name; 171 172 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 173 if (sd == null) 174 return null; 175 176 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 177 checkElement(element, path, result.getProperty()); 178 result.markLocation(line(element), col(element)); 179 result.setType(element.getLocalName()); 180 parseChildren(path, element, result); 181 result.numberChildren(); 182 return result; 183 } 184 185 private String pathPrefix(String ns) { 186 if (Utilities.noString(ns)) 187 return ""; 188 if (ns.equals(FormatUtilities.FHIR_NS)) 189 return "f:"; 190 if (ns.equals(FormatUtilities.XHTML_NS)) 191 return "h:"; 192 if (ns.equals("urn:hl7-org:v3")) 193 return "v3:"; 194 return "?:"; 195 } 196 197 private boolean empty(org.w3c.dom.Element element) { 198 for (int i = 0; i < element.getAttributes().getLength(); i++) { 199 String n = element.getAttributes().item(i).getNodeName(); 200 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 201 return false; 202 } 203 if (!Utilities.noString(element.getTextContent().trim())) 204 return false; 205 206 Node n = element.getFirstChild(); 207 while (n != null) { 208 if (n.getNodeType() == Node.ELEMENT_NODE) 209 return false; 210 n = n.getNextSibling(); 211 } 212 return true; 213 } 214 215 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 216 if (policy == ValidationPolicy.EVERYTHING) { 217 if (empty(element)) 218 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 219 String ns = FormatUtilities.FHIR_NS; 220 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 221 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 222 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 223 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 224 if (!element.getNamespaceURI().equals(ns)) 225 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 226 } 227 } 228 229 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 230 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 231 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 232 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 233 checkElement(base, path, result.getProperty()); 234 result.setType(base.getLocalName()); 235 parseChildren(path, base, result); 236 result.numberChildren(); 237 return result; 238 } 239 240 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 241 // this parsing routine retains the original order in a the XML file, to support validation 242 reapComments(node, context); 243 List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node)); 244 245 String text = XMLUtil.getDirectText(node).trim(); 246 if (!Utilities.noString(text)) { 247 Property property = getTextProp(properties); 248 if (property != null) { 249 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 250 } else { 251 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 252 } 253 } 254 255 for (int i = 0; i < node.getAttributes().getLength(); i++) { 256 Node attr = node.getAttributes().item(i); 257 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 258 Property property = getAttrProp(properties, attr.getNodeName()); 259 if (property != null) { 260 String av = attr.getNodeValue(); 261 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 262 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 263 if (property.getName().equals("value") && context.isPrimitive()) 264 context.setValue(av); 265 else 266 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 267 } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) { 268 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR); 269 } 270 } 271 } 272 273 Node child = node.getFirstChild(); 274 while (child != null) { 275 if (child.getNodeType() == Node.ELEMENT_NODE) { 276 Property property = getElementProp(properties, child.getLocalName()); 277 if (property != null) { 278 if (!property.isChoice() && "xhtml".equals(property.getType())) { 279 XhtmlParser xp = new XhtmlParser(); 280 XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child); 281 if (policy == ValidationPolicy.EVERYTHING) { 282 for (StringPair s : xp.getValidationIssues()) { 283 logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR); 284 } 285 } 286 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 287 } else { 288 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 289 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 290 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 291 boolean ok = true; 292 if (property.isChoice()) { 293 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 294 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 295 if (xsiType == null) { 296 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 297 ok = false; 298 } else { 299 if (xsiType.contains(":")) 300 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 301 n.setType(xsiType); 302 } 303 } else 304 n.setType(n.getType()); 305 } 306 context.getChildren().add(n); 307 if (ok) { 308 if (property.isResource()) 309 parseResource(npath, (org.w3c.dom.Element) child, n, property); 310 else 311 parseChildren(npath, (org.w3c.dom.Element) child, n); 312 } 313 } 314 } else 315 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 316 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 317 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 318 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 319 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 320 } 321 child = child.getNextSibling(); 322 } 323 } 324 325 private Property getElementProp(List<Property> properties, String nodeName) { 326 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 327 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 328 // and therefore the longer property names get evaluated first 329 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 330 @Override 331 public int compare(Property o1, Property o2) { 332 return o2.getName().length() - o1.getName().length(); 333 } 334 }); 335 for (Property p : propsSortedByLongestFirst) 336 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 337 if (p.getName().equals(nodeName)) 338 return p; 339 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 340 return p; 341 } 342 return null; 343 } 344 345 private Property getAttrProp(List<Property> properties, String nodeName) { 346 for (Property p : properties) 347 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 348 return p; 349 return null; 350 } 351 352 private Property getTextProp(List<Property> properties) { 353 for (Property p : properties) 354 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 355 return p; 356 return null; 357 } 358 359 private String convertForDateFormat(String fmt, String av) throws FHIRException { 360 if ("v3".equals(fmt)) { 361 DateTimeType d = DateTimeType.parseV3(av); 362 return d.asStringValue(); 363 } else 364 throw new FHIRException("Unknown Data format '"+fmt+"'"); 365 } 366 367 private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 368 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 369 String name = res.getLocalName(); 370 StructureDefinition sd = context.fetchTypeDefinition(name); 371 if (sd == null) 372 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 373 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 374 parent.setType(name); 375 parseChildren(res.getLocalName(), res, parent); 376 } 377 378 private void reapComments(org.w3c.dom.Element element, Element context) { 379 Node node = element.getPreviousSibling(); 380 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 381 if (node.getNodeType() == Node.COMMENT_NODE) 382 context.getComments().add(0, node.getTextContent()); 383 node = node.getPreviousSibling(); 384 } 385 node = element.getLastChild(); 386 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 387 node = node.getPreviousSibling(); 388 } 389 while (node != null) { 390 if (node.getNodeType() == Node.COMMENT_NODE) 391 context.getComments().add(node.getTextContent()); 392 node = node.getNextSibling(); 393 } 394 } 395 396 private boolean isAttr(Property property) { 397 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 398 if (r.getValue() == PropertyRepresentation.XMLATTR) { 399 return true; 400 } 401 } 402 return false; 403 } 404 405 private boolean isText(Property property) { 406 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 407 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 408 return true; 409 } 410 } 411 return false; 412 } 413 414 @Override 415 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException { 416 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 417 xml.setPretty(style == OutputStyle.PRETTY); 418 xml.start(); 419 xml.setDefaultNamespace(e.getProperty().getNamespace()); 420 composeElement(xml, e, e.getType()); 421 xml.end(); 422 423 } 424 425 public void compose(Element e, IXMLWriter xml) throws Exception { 426 xml.start(); 427 xml.setDefaultNamespace(e.getProperty().getNamespace()); 428 composeElement(xml, e, e.getType()); 429 xml.end(); 430 } 431 432 private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException { 433 for (String s : element.getComments()) { 434 xml.comment(s, true); 435 } 436 if (isText(element.getProperty())) { 437 if (linkResolver != null) 438 xml.link(linkResolver.resolveProperty(element.getProperty())); 439 xml.enter(elementName); 440 xml.text(element.getValue()); 441 xml.exit(elementName); 442 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 443 if (element.getType().equals("xhtml")) { 444 xml.escapedText(element.getValue()); 445 } else if (isText(element.getProperty())) { 446 if (linkResolver != null) 447 xml.link(linkResolver.resolveProperty(element.getProperty())); 448 xml.text(element.getValue()); 449 } else { 450 if (element.hasValue()) { 451 if (linkResolver != null) 452 xml.link(linkResolver.resolveType(element.getType())); 453 xml.attribute("value", element.getValue()); 454 } 455 if (linkResolver != null) 456 xml.link(linkResolver.resolveProperty(element.getProperty())); 457 if (element.hasChildren()) { 458 xml.enter(elementName); 459 for (Element child : element.getChildren()) 460 composeElement(xml, child, child.getName()); 461 xml.exit(elementName); 462 } else 463 xml.element(elementName); 464 } 465 } else { 466 for (Element child : element.getChildren()) { 467 if (isAttr(child.getProperty())) { 468 if (linkResolver != null) 469 xml.link(linkResolver.resolveType(child.getType())); 470 xml.attribute(child.getName(), child.getValue()); 471 } 472 } 473 if (linkResolver != null) 474 xml.link(linkResolver.resolveProperty(element.getProperty())); 475 xml.enter(elementName); 476 if (element.getSpecial() != null) { 477 if (linkResolver != null) 478 xml.link(linkResolver.resolveProperty(element.getProperty())); 479 xml.enter(element.getType()); 480 } 481 for (Element child : element.getChildren()) { 482 if (isText(child.getProperty())) { 483 if (linkResolver != null) 484 xml.link(linkResolver.resolveProperty(element.getProperty())); 485 xml.text(child.getValue()); 486 } else if (!isAttr(child.getProperty())) 487 composeElement(xml, child, child.getName()); 488 } 489 if (element.getSpecial() != null) 490 xml.exit(element.getType()); 491 xml.exit(elementName); 492 } 493 } 494 495}