
001package org.hl7.fhir.dstu3.elementmodel; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Comparator; 040import java.util.List; 041 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.SAXParser; 045import javax.xml.parsers.SAXParserFactory; 046import javax.xml.transform.Transformer; 047import javax.xml.transform.TransformerFactory; 048import javax.xml.transform.dom.DOMResult; 049import javax.xml.transform.sax.SAXSource; 050 051import org.hl7.fhir.dstu3.context.IWorkerContext; 052import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement; 053import org.hl7.fhir.dstu3.formats.FormatUtilities; 054import org.hl7.fhir.dstu3.formats.IParser.OutputStyle; 055import org.hl7.fhir.dstu3.model.DateTimeType; 056import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation; 057import org.hl7.fhir.dstu3.model.Enumeration; 058import org.hl7.fhir.dstu3.model.StructureDefinition; 059import org.hl7.fhir.dstu3.utils.ToolingExtensions; 060import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator; 061import org.hl7.fhir.dstu3.utils.formats.XmlLocationData; 062import org.hl7.fhir.exceptions.DefinitionException; 063import org.hl7.fhir.exceptions.FHIRException; 064import org.hl7.fhir.exceptions.FHIRFormatError; 065import org.hl7.fhir.utilities.StringPair; 066import org.hl7.fhir.utilities.Utilities; 067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 068import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 069import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 070import org.hl7.fhir.utilities.xhtml.XhtmlNode; 071import org.hl7.fhir.utilities.xhtml.XhtmlParser; 072import org.hl7.fhir.utilities.xml.IXMLWriter; 073import org.hl7.fhir.utilities.xml.XMLUtil; 074import org.hl7.fhir.utilities.xml.XMLWriter; 075import org.w3c.dom.Document; 076import org.w3c.dom.Node; 077import org.xml.sax.InputSource; 078import org.xml.sax.XMLReader; 079 080public class XmlParser extends ParserBase { 081 private boolean allowXsiLocation; 082 083 public XmlParser(IWorkerContext context) { 084 super(context); 085 } 086 087 088 public boolean isAllowXsiLocation() { 089 return allowXsiLocation; 090 } 091 092 public void setAllowXsiLocation(boolean allowXsiLocation) { 093 this.allowXsiLocation = allowXsiLocation; 094 } 095 096 097 public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 098 Document doc = null; 099 try { 100 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 101 // xxe protection 102 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 103 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 104 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 105 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 106 factory.setXIncludeAware(false); 107 factory.setExpandEntityReferences(false); 108 109 factory.setNamespaceAware(true); 110 if (policy == ValidationPolicy.EVERYTHING) { 111 // use a slower parser that keeps location data 112 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 113 Transformer nullTransformer = transformerFactory.newTransformer(); 114 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 115 doc = docBuilder.newDocument(); 116 DOMResult domResult = new DOMResult(doc); 117 SAXParserFactory spf = SAXParserFactory.newInstance(); 118 spf.setNamespaceAware(true); 119 spf.setValidating(false); 120 // xxe protection 121 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 122 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 123 SAXParser saxParser = spf.newSAXParser(); 124 XMLReader xmlReader = saxParser.getXMLReader(); 125 // xxe protection 126 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 127 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 128 129 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 130 InputSource inputSource = new InputSource(stream); 131 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 132 nullTransformer.transform(saxSource, domResult); 133 } else { 134 DocumentBuilder builder = factory.newDocumentBuilder(); 135 doc = builder.parse(stream); 136 } 137 } catch (Exception e) { 138 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 139 doc = null; 140 } 141 if (doc == null) 142 return null; 143 else 144 return parse(doc); 145 } 146 147 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 148 if (policy == ValidationPolicy.EVERYTHING) { 149 Node node = document.getFirstChild(); 150 while (node != null) { 151 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 152 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 153 node = node.getNextSibling(); 154 } 155 } 156 } 157 158 159 private int line(Node node) { 160 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 161 return loc == null ? 0 : loc.getStartLine(); 162 } 163 164 private int col(Node node) { 165 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 166 return loc == null ? 0 : loc.getStartColumn(); 167 } 168 169 public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 170 checkForProcessingInstruction(doc); 171 org.w3c.dom.Element element = doc.getDocumentElement(); 172 return parse(element); 173 } 174 175 public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 176 String ns = element.getNamespaceURI(); 177 String name = element.getLocalName(); 178 String path = "/"+pathPrefix(ns)+name; 179 180 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 181 if (sd == null) 182 return null; 183 184 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 185 checkElement(element, path, result.getProperty()); 186 result.markLocation(line(element), col(element)); 187 result.setType(element.getLocalName()); 188 parseChildren(path, element, result); 189 result.numberChildren(); 190 return result; 191 } 192 193 private String pathPrefix(String ns) { 194 if (Utilities.noString(ns)) 195 return ""; 196 if (ns.equals(FormatUtilities.FHIR_NS)) 197 return "f:"; 198 if (ns.equals(FormatUtilities.XHTML_NS)) 199 return "h:"; 200 if (ns.equals("urn:hl7-org:v3")) 201 return "v3:"; 202 return "?:"; 203 } 204 205 private boolean empty(org.w3c.dom.Element element) { 206 for (int i = 0; i < element.getAttributes().getLength(); i++) { 207 String n = element.getAttributes().item(i).getNodeName(); 208 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 209 return false; 210 } 211 if (!Utilities.noString(element.getTextContent().trim())) 212 return false; 213 214 Node n = element.getFirstChild(); 215 while (n != null) { 216 if (n.getNodeType() == Node.ELEMENT_NODE) 217 return false; 218 n = n.getNextSibling(); 219 } 220 return true; 221 } 222 223 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 224 if (policy == ValidationPolicy.EVERYTHING) { 225 if (empty(element)) 226 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 227 String ns = FormatUtilities.FHIR_NS; 228 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 229 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 230 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 231 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 232 if (!element.getNamespaceURI().equals(ns)) 233 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 234 } 235 } 236 237 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 238 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 239 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 240 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 241 checkElement(base, path, result.getProperty()); 242 result.setType(base.getLocalName()); 243 parseChildren(path, base, result); 244 result.numberChildren(); 245 return result; 246 } 247 248 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 249 // this parsing routine retains the original order in a the XML file, to support validation 250 reapComments(node, context); 251 List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node)); 252 253 String text = XMLUtil.getDirectText(node).trim(); 254 if (!Utilities.noString(text)) { 255 Property property = getTextProp(properties); 256 if (property != null) { 257 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 258 } else { 259 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 260 } 261 } 262 263 for (int i = 0; i < node.getAttributes().getLength(); i++) { 264 Node attr = node.getAttributes().item(i); 265 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 266 Property property = getAttrProp(properties, attr.getNodeName()); 267 if (property != null) { 268 String av = attr.getNodeValue(); 269 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 270 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 271 if (property.getName().equals("value") && context.isPrimitive()) 272 context.setValue(av); 273 else 274 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 275 } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) { 276 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR); 277 } 278 } 279 } 280 281 Node child = node.getFirstChild(); 282 while (child != null) { 283 if (child.getNodeType() == Node.ELEMENT_NODE) { 284 Property property = getElementProp(properties, child.getLocalName()); 285 if (property != null) { 286 if (!property.isChoice() && "xhtml".equals(property.getType())) { 287 XhtmlParser xp = new XhtmlParser(); 288 XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child); 289 if (policy == ValidationPolicy.EVERYTHING) { 290 for (StringPair s : xp.getValidationIssues()) { 291 logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR); 292 } 293 } 294 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 295 } else { 296 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 297 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 298 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 299 boolean ok = true; 300 if (property.isChoice()) { 301 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 302 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 303 if (xsiType == null) { 304 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 305 ok = false; 306 } else { 307 if (xsiType.contains(":")) 308 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 309 n.setType(xsiType); 310 } 311 } else 312 n.setType(n.getType()); 313 } 314 context.getChildren().add(n); 315 if (ok) { 316 if (property.isResource()) 317 parseResource(npath, (org.w3c.dom.Element) child, n, property); 318 else 319 parseChildren(npath, (org.w3c.dom.Element) child, n); 320 } 321 } 322 } else 323 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 324 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 325 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 326 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 327 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 328 } 329 child = child.getNextSibling(); 330 } 331 } 332 333 private Property getElementProp(List<Property> properties, String nodeName) { 334 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 335 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 336 // and therefore the longer property names get evaluated first 337 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 338 @Override 339 public int compare(Property o1, Property o2) { 340 return o2.getName().length() - o1.getName().length(); 341 } 342 }); 343 for (Property p : propsSortedByLongestFirst) 344 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 345 if (p.getName().equals(nodeName)) 346 return p; 347 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 348 return p; 349 } 350 return null; 351 } 352 353 private Property getAttrProp(List<Property> properties, String nodeName) { 354 for (Property p : properties) 355 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 356 return p; 357 return null; 358 } 359 360 private Property getTextProp(List<Property> properties) { 361 for (Property p : properties) 362 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 363 return p; 364 return null; 365 } 366 367 private String convertForDateFormat(String fmt, String av) throws FHIRException { 368 if ("v3".equals(fmt)) { 369 DateTimeType d = DateTimeType.parseV3(av); 370 return d.asStringValue(); 371 } else 372 throw new FHIRException("Unknown Data format '"+fmt+"'"); 373 } 374 375 private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 376 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 377 String name = res.getLocalName(); 378 StructureDefinition sd = context.fetchTypeDefinition(name); 379 if (sd == null) 380 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 381 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 382 parent.setType(name); 383 parseChildren(res.getLocalName(), res, parent); 384 } 385 386 private void reapComments(org.w3c.dom.Element element, Element context) { 387 Node node = element.getPreviousSibling(); 388 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 389 if (node.getNodeType() == Node.COMMENT_NODE) 390 context.getComments().add(0, node.getTextContent()); 391 node = node.getPreviousSibling(); 392 } 393 node = element.getLastChild(); 394 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 395 node = node.getPreviousSibling(); 396 } 397 while (node != null) { 398 if (node.getNodeType() == Node.COMMENT_NODE) 399 context.getComments().add(node.getTextContent()); 400 node = node.getNextSibling(); 401 } 402 } 403 404 private boolean isAttr(Property property) { 405 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 406 if (r.getValue() == PropertyRepresentation.XMLATTR) { 407 return true; 408 } 409 } 410 return false; 411 } 412 413 private boolean isText(Property property) { 414 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 415 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 416 return true; 417 } 418 } 419 return false; 420 } 421 422 @Override 423 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException { 424 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 425 xml.setPretty(style == OutputStyle.PRETTY); 426 xml.start(); 427 xml.setDefaultNamespace(e.getProperty().getNamespace()); 428 composeElement(xml, e, e.getType()); 429 xml.end(); 430 431 } 432 433 public void compose(Element e, IXMLWriter xml) throws Exception { 434 xml.start(); 435 xml.setDefaultNamespace(e.getProperty().getNamespace()); 436 composeElement(xml, e, e.getType()); 437 xml.end(); 438 } 439 440 private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException { 441 for (String s : element.getComments()) { 442 xml.comment(s, true); 443 } 444 if (isText(element.getProperty())) { 445 if (linkResolver != null) 446 xml.link(linkResolver.resolveProperty(element.getProperty())); 447 xml.enter(elementName); 448 xml.text(element.getValue()); 449 xml.exit(elementName); 450 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 451 if (element.getType().equals("xhtml")) { 452 xml.escapedText(element.getValue()); 453 } else if (isText(element.getProperty())) { 454 if (linkResolver != null) 455 xml.link(linkResolver.resolveProperty(element.getProperty())); 456 xml.text(element.getValue()); 457 } else { 458 if (element.hasValue()) { 459 if (linkResolver != null) 460 xml.link(linkResolver.resolveType(element.getType())); 461 xml.attribute("value", element.getValue()); 462 } 463 if (linkResolver != null) 464 xml.link(linkResolver.resolveProperty(element.getProperty())); 465 if (element.hasChildren()) { 466 xml.enter(elementName); 467 for (Element child : element.getChildren()) 468 composeElement(xml, child, child.getName()); 469 xml.exit(elementName); 470 } else 471 xml.element(elementName); 472 } 473 } else { 474 for (Element child : element.getChildren()) { 475 if (isAttr(child.getProperty())) { 476 if (linkResolver != null) 477 xml.link(linkResolver.resolveType(child.getType())); 478 xml.attribute(child.getName(), child.getValue()); 479 } 480 } 481 if (linkResolver != null) 482 xml.link(linkResolver.resolveProperty(element.getProperty())); 483 xml.enter(elementName); 484 if (element.getSpecial() != null) { 485 if (linkResolver != null) 486 xml.link(linkResolver.resolveProperty(element.getProperty())); 487 xml.enter(element.getType()); 488 } 489 for (Element child : element.getChildren()) { 490 if (isText(child.getProperty())) { 491 if (linkResolver != null) 492 xml.link(linkResolver.resolveProperty(element.getProperty())); 493 xml.text(child.getValue()); 494 } else if (!isAttr(child.getProperty())) 495 composeElement(xml, child, child.getName()); 496 } 497 if (element.getSpecial() != null) 498 xml.exit(element.getType()); 499 xml.exit(elementName); 500 } 501 } 502 503}