
001package org.hl7.fhir.dstu3.elementmodel; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Comparator; 040import java.util.List; 041 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.SAXParserFactory; 045import javax.xml.transform.Transformer; 046import javax.xml.transform.TransformerFactory; 047import javax.xml.transform.dom.DOMResult; 048import javax.xml.transform.sax.SAXSource; 049 050import org.hl7.fhir.dstu3.context.IWorkerContext; 051import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement; 052import org.hl7.fhir.dstu3.formats.FormatUtilities; 053import org.hl7.fhir.dstu3.formats.IParser.OutputStyle; 054import org.hl7.fhir.dstu3.model.DateTimeType; 055import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation; 056import org.hl7.fhir.dstu3.model.Enumeration; 057import org.hl7.fhir.dstu3.model.StructureDefinition; 058import org.hl7.fhir.dstu3.utils.ToolingExtensions; 059import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator; 060import org.hl7.fhir.dstu3.utils.formats.XmlLocationData; 061import org.hl7.fhir.exceptions.DefinitionException; 062import org.hl7.fhir.exceptions.FHIRException; 063import org.hl7.fhir.exceptions.FHIRFormatError; 064import org.hl7.fhir.utilities.StringPair; 065import org.hl7.fhir.utilities.Utilities; 066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 068import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 069import org.hl7.fhir.utilities.xhtml.XhtmlNode; 070import org.hl7.fhir.utilities.xhtml.XhtmlParser; 071import org.hl7.fhir.utilities.xml.IXMLWriter; 072import org.hl7.fhir.utilities.xml.XMLUtil; 073import org.hl7.fhir.utilities.xml.XMLWriter; 074import org.w3c.dom.Document; 075import org.w3c.dom.Node; 076import org.xml.sax.InputSource; 077import org.xml.sax.XMLReader; 078 079@Deprecated 080public class XmlParser extends ParserBase { 081 private boolean allowXsiLocation; 082 083 public XmlParser(IWorkerContext context) { 084 super(context); 085 } 086 087 088 public boolean isAllowXsiLocation() { 089 return allowXsiLocation; 090 } 091 092 public void setAllowXsiLocation(boolean allowXsiLocation) { 093 this.allowXsiLocation = allowXsiLocation; 094 } 095 096 097 public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 098 Document doc = null; 099 try { 100 DocumentBuilderFactory factory = XMLUtil.newXXEProtectedDocumentBuilderFactory(); 101 // xxe protection 102 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 103 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 104 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 105 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 106 factory.setXIncludeAware(false); 107 factory.setExpandEntityReferences(false); 108 109 factory.setNamespaceAware(true); 110 if (policy == ValidationPolicy.EVERYTHING) { 111 // use a slower parser that keeps location data 112 TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory(); 113 Transformer nullTransformer = transformerFactory.newTransformer(); 114 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 115 doc = docBuilder.newDocument(); 116 DOMResult domResult = new DOMResult(doc); 117 SAXParserFactory spf = XMLUtil.newXXEProtectedSaxParserFactory(); 118 spf.setNamespaceAware(true); 119 spf.setValidating(false); 120 XMLReader xmlReader = XMLUtil.getXXEProtectedXMLReader(spf); 121 122 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 123 InputSource inputSource = new InputSource(stream); 124 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 125 nullTransformer.transform(saxSource, domResult); 126 } else { 127 DocumentBuilder builder = factory.newDocumentBuilder(); 128 doc = builder.parse(stream); 129 } 130 } catch (Exception e) { 131 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 132 doc = null; 133 } 134 if (doc == null) 135 return null; 136 else 137 return parse(doc); 138 } 139 140 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 141 if (policy == ValidationPolicy.EVERYTHING) { 142 Node node = document.getFirstChild(); 143 while (node != null) { 144 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 145 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 146 node = node.getNextSibling(); 147 } 148 } 149 } 150 151 152 private int line(Node node) { 153 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 154 return loc == null ? 0 : loc.getStartLine(); 155 } 156 157 private int col(Node node) { 158 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 159 return loc == null ? 0 : loc.getStartColumn(); 160 } 161 162 public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 163 checkForProcessingInstruction(doc); 164 org.w3c.dom.Element element = doc.getDocumentElement(); 165 return parse(element); 166 } 167 168 public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 169 String ns = element.getNamespaceURI(); 170 String name = element.getLocalName(); 171 String path = "/"+pathPrefix(ns)+name; 172 173 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 174 if (sd == null) 175 return null; 176 177 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 178 checkElement(element, path, result.getProperty()); 179 result.markLocation(line(element), col(element)); 180 result.setType(element.getLocalName()); 181 parseChildren(path, element, result); 182 result.numberChildren(); 183 return result; 184 } 185 186 private String pathPrefix(String ns) { 187 if (Utilities.noString(ns)) 188 return ""; 189 if (ns.equals(FormatUtilities.FHIR_NS)) 190 return "f:"; 191 if (ns.equals(FormatUtilities.XHTML_NS)) 192 return "h:"; 193 if (ns.equals("urn:hl7-org:v3")) 194 return "v3:"; 195 return "?:"; 196 } 197 198 private boolean empty(org.w3c.dom.Element element) { 199 for (int i = 0; i < element.getAttributes().getLength(); i++) { 200 String n = element.getAttributes().item(i).getNodeName(); 201 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 202 return false; 203 } 204 if (!Utilities.noString(element.getTextContent().trim())) 205 return false; 206 207 Node n = element.getFirstChild(); 208 while (n != null) { 209 if (n.getNodeType() == Node.ELEMENT_NODE) 210 return false; 211 n = n.getNextSibling(); 212 } 213 return true; 214 } 215 216 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 217 if (policy == ValidationPolicy.EVERYTHING) { 218 if (empty(element)) 219 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 220 String ns = FormatUtilities.FHIR_NS; 221 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 222 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 223 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 224 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 225 if (!element.getNamespaceURI().equals(ns)) 226 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 227 } 228 } 229 230 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 231 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 232 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 233 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 234 checkElement(base, path, result.getProperty()); 235 result.setType(base.getLocalName()); 236 parseChildren(path, base, result); 237 result.numberChildren(); 238 return result; 239 } 240 241 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 242 // this parsing routine retains the original order in a the XML file, to support validation 243 reapComments(node, context); 244 List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node)); 245 246 String text = XMLUtil.getDirectText(node).trim(); 247 if (!Utilities.noString(text)) { 248 Property property = getTextProp(properties); 249 if (property != null) { 250 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 251 } else { 252 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 253 } 254 } 255 256 for (int i = 0; i < node.getAttributes().getLength(); i++) { 257 Node attr = node.getAttributes().item(i); 258 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 259 Property property = getAttrProp(properties, attr.getNodeName()); 260 if (property != null) { 261 String av = attr.getNodeValue(); 262 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 263 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 264 if (property.getName().equals("value") && context.isPrimitive()) 265 context.setValue(av); 266 else 267 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 268 } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) { 269 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR); 270 } 271 } 272 } 273 274 Node child = node.getFirstChild(); 275 while (child != null) { 276 if (child.getNodeType() == Node.ELEMENT_NODE) { 277 Property property = getElementProp(properties, child.getLocalName()); 278 if (property != null) { 279 if (!property.isChoice() && "xhtml".equals(property.getType())) { 280 XhtmlParser xp = new XhtmlParser(); 281 XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child); 282 if (policy == ValidationPolicy.EVERYTHING) { 283 for (StringPair s : xp.getValidationIssues()) { 284 logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR); 285 } 286 } 287 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 288 } else { 289 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 290 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 291 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 292 boolean ok = true; 293 if (property.isChoice()) { 294 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 295 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 296 if (xsiType == null) { 297 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 298 ok = false; 299 } else { 300 if (xsiType.contains(":")) 301 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 302 n.setType(xsiType); 303 } 304 } else 305 n.setType(n.getType()); 306 } 307 context.getChildren().add(n); 308 if (ok) { 309 if (property.isResource()) 310 parseResource(npath, (org.w3c.dom.Element) child, n, property); 311 else 312 parseChildren(npath, (org.w3c.dom.Element) child, n); 313 } 314 } 315 } else 316 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 317 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 318 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 319 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 320 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 321 } 322 child = child.getNextSibling(); 323 } 324 } 325 326 private Property getElementProp(List<Property> properties, String nodeName) { 327 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 328 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 329 // and therefore the longer property names get evaluated first 330 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 331 @Override 332 public int compare(Property o1, Property o2) { 333 return o2.getName().length() - o1.getName().length(); 334 } 335 }); 336 for (Property p : propsSortedByLongestFirst) 337 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 338 if (p.getName().equals(nodeName)) 339 return p; 340 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 341 return p; 342 } 343 return null; 344 } 345 346 private Property getAttrProp(List<Property> properties, String nodeName) { 347 for (Property p : properties) 348 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 349 return p; 350 return null; 351 } 352 353 private Property getTextProp(List<Property> properties) { 354 for (Property p : properties) 355 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 356 return p; 357 return null; 358 } 359 360 private String convertForDateFormat(String fmt, String av) throws FHIRException { 361 if ("v3".equals(fmt)) { 362 DateTimeType d = DateTimeType.parseV3(av); 363 return d.asStringValue(); 364 } else 365 throw new FHIRException("Unknown Data format '"+fmt+"'"); 366 } 367 368 private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 369 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 370 String name = res.getLocalName(); 371 StructureDefinition sd = context.fetchTypeDefinition(name); 372 if (sd == null) 373 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 374 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 375 parent.setType(name); 376 parseChildren(res.getLocalName(), res, parent); 377 } 378 379 private void reapComments(org.w3c.dom.Element element, Element context) { 380 Node node = element.getPreviousSibling(); 381 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 382 if (node.getNodeType() == Node.COMMENT_NODE) 383 context.getComments().add(0, node.getTextContent()); 384 node = node.getPreviousSibling(); 385 } 386 node = element.getLastChild(); 387 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 388 node = node.getPreviousSibling(); 389 } 390 while (node != null) { 391 if (node.getNodeType() == Node.COMMENT_NODE) 392 context.getComments().add(node.getTextContent()); 393 node = node.getNextSibling(); 394 } 395 } 396 397 private boolean isAttr(Property property) { 398 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 399 if (r.getValue() == PropertyRepresentation.XMLATTR) { 400 return true; 401 } 402 } 403 return false; 404 } 405 406 private boolean isText(Property property) { 407 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 408 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 409 return true; 410 } 411 } 412 return false; 413 } 414 415 @Override 416 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException { 417 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 418 xml.setPretty(style == OutputStyle.PRETTY); 419 xml.start(); 420 xml.setDefaultNamespace(e.getProperty().getNamespace()); 421 composeElement(xml, e, e.getType()); 422 xml.end(); 423 424 } 425 426 public void compose(Element e, IXMLWriter xml) throws Exception { 427 xml.start(); 428 xml.setDefaultNamespace(e.getProperty().getNamespace()); 429 composeElement(xml, e, e.getType()); 430 xml.end(); 431 } 432 433 private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException { 434 for (String s : element.getComments()) { 435 xml.comment(s, true); 436 } 437 if (isText(element.getProperty())) { 438 if (linkResolver != null) 439 xml.link(linkResolver.resolveProperty(element.getProperty())); 440 xml.enter(elementName); 441 xml.text(element.getValue()); 442 xml.exit(elementName); 443 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 444 if (element.getType().equals("xhtml")) { 445 xml.escapedText(element.getValue()); 446 } else if (isText(element.getProperty())) { 447 if (linkResolver != null) 448 xml.link(linkResolver.resolveProperty(element.getProperty())); 449 xml.text(element.getValue()); 450 } else { 451 if (element.hasValue()) { 452 if (linkResolver != null) 453 xml.link(linkResolver.resolveType(element.getType())); 454 xml.attribute("value", element.getValue()); 455 } 456 if (linkResolver != null) 457 xml.link(linkResolver.resolveProperty(element.getProperty())); 458 if (element.hasChildren()) { 459 xml.enter(elementName); 460 for (Element child : element.getChildren()) 461 composeElement(xml, child, child.getName()); 462 xml.exit(elementName); 463 } else 464 xml.element(elementName); 465 } 466 } else { 467 for (Element child : element.getChildren()) { 468 if (isAttr(child.getProperty())) { 469 if (linkResolver != null) 470 xml.link(linkResolver.resolveType(child.getType())); 471 xml.attribute(child.getName(), child.getValue()); 472 } 473 } 474 if (linkResolver != null) 475 xml.link(linkResolver.resolveProperty(element.getProperty())); 476 xml.enter(elementName); 477 if (element.getSpecial() != null) { 478 if (linkResolver != null) 479 xml.link(linkResolver.resolveProperty(element.getProperty())); 480 xml.enter(element.getType()); 481 } 482 for (Element child : element.getChildren()) { 483 if (isText(child.getProperty())) { 484 if (linkResolver != null) 485 xml.link(linkResolver.resolveProperty(element.getProperty())); 486 xml.text(child.getValue()); 487 } else if (!isAttr(child.getProperty())) 488 composeElement(xml, child, child.getName()); 489 } 490 if (element.getSpecial() != null) 491 xml.exit(element.getType()); 492 xml.exit(elementName); 493 } 494 } 495 496}