001package org.hl7.fhir.r5.elementmodel; 002 003import java.io.ByteArrayInputStream; 004 005/* 006 Copyright (c) 2011+, HL7, Inc. 007 All rights reserved. 008 009 Redistribution and use in source and binary forms, with or without modification, 010 are permitted provided that the following conditions are met: 011 012 * Redistributions of source code must retain the above copyright notice, this 013 list of conditions and the following disclaimer. 014 * Redistributions in binary form must reproduce the above copyright notice, 015 this list of conditions and the following disclaimer in the documentation 016 and/or other materials provided with the distribution. 017 * Neither the name of HL7 nor the names of its contributors may be used to 018 endorse or promote products derived from this software without specific 019 prior written permission. 020 021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 022 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 023 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 024 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 025 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 026 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 028 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 029 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 030 POSSIBILITY OF SUCH DAMAGE. 031 032 */ 033 034 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.OutputStream; 038import java.util.ArrayList; 039import java.util.Collections; 040import java.util.Comparator; 041import java.util.HashSet; 042import java.util.List; 043import java.util.Set; 044 045import javax.xml.parsers.DocumentBuilder; 046import javax.xml.parsers.DocumentBuilderFactory; 047import javax.xml.parsers.SAXParser; 048import javax.xml.parsers.SAXParserFactory; 049import javax.xml.transform.Transformer; 050import javax.xml.transform.TransformerFactory; 051import javax.xml.transform.dom.DOMResult; 052import javax.xml.transform.sax.SAXSource; 053 054import org.hl7.fhir.exceptions.DefinitionException; 055import org.hl7.fhir.exceptions.FHIRException; 056import org.hl7.fhir.exceptions.FHIRFormatError; 057import org.hl7.fhir.r5.conformance.profile.ProfileUtilities; 058import org.hl7.fhir.r5.context.IWorkerContext; 059import org.hl7.fhir.r5.elementmodel.Element.SpecialElement; 060import org.hl7.fhir.r5.elementmodel.Manager.FhirFormat; 061import org.hl7.fhir.r5.formats.FormatUtilities; 062import org.hl7.fhir.r5.formats.IParser.OutputStyle; 063import org.hl7.fhir.r5.model.Constants; 064import org.hl7.fhir.r5.model.DateTimeType; 065import org.hl7.fhir.r5.model.ElementDefinition; 066import org.hl7.fhir.r5.model.ElementDefinition.PropertyRepresentation; 067import org.hl7.fhir.r5.model.Enumeration; 068import org.hl7.fhir.r5.model.StructureDefinition; 069import org.hl7.fhir.r5.utils.ToolingExtensions; 070import org.hl7.fhir.r5.utils.formats.XmlLocationAnnotator; 071import org.hl7.fhir.r5.utils.formats.XmlLocationData; 072import org.hl7.fhir.utilities.ElementDecoration; 073import org.hl7.fhir.utilities.StringPair; 074import org.hl7.fhir.utilities.TextFile; 075import org.hl7.fhir.utilities.Utilities; 076import org.hl7.fhir.utilities.i18n.I18nConstants; 077import org.hl7.fhir.utilities.validation.ValidationMessage; 078import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 079import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 080import org.hl7.fhir.utilities.xhtml.CDANarrativeFormat; 081import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 082import org.hl7.fhir.utilities.xhtml.XhtmlNode; 083import org.hl7.fhir.utilities.xhtml.XhtmlParser; 084import org.hl7.fhir.utilities.xml.IXMLWriter; 085import org.hl7.fhir.utilities.xml.XMLUtil; 086import org.hl7.fhir.utilities.xml.XMLWriter; 087import org.w3c.dom.Document; 088import org.w3c.dom.Node; 089import org.xml.sax.ErrorHandler; 090import org.xml.sax.InputSource; 091import org.xml.sax.SAXParseException; 092import org.xml.sax.XMLReader; 093 094public class XmlParser extends ParserBase { 095 private boolean allowXsiLocation; 096 private String version; 097 098 public XmlParser(IWorkerContext context) { 099 super(context); 100 } 101 102 private String schemaPath; 103 private boolean markedXhtml; 104 105 public String getSchemaPath() { 106 return schemaPath; 107 } 108 public void setSchemaPath(String schemaPath) { 109 this.schemaPath = schemaPath; 110 } 111 112 public boolean isAllowXsiLocation() { 113 return allowXsiLocation; 114 } 115 116 public void setAllowXsiLocation(boolean allowXsiLocation) { 117 this.allowXsiLocation = allowXsiLocation; 118 } 119 120 public List<ValidatedFragment> parse(InputStream inStream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 121 122 byte[] content = TextFile.streamToBytes(inStream); 123 ValidatedFragment focusFragment = new ValidatedFragment(ValidatedFragment.FOCUS_NAME, "xml", content, false); 124 125 ByteArrayInputStream stream = new ByteArrayInputStream(content); 126 Document doc = null; 127 try { 128 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 129 // xxe protection 130 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 131 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 132 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 133 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 134 factory.setXIncludeAware(false); 135 factory.setExpandEntityReferences(false); 136 137 factory.setNamespaceAware(true); 138 if (policy == ValidationPolicy.EVERYTHING) { 139 // The SAX interface appears to not work when reporting the correct version/encoding. 140 // if we can, we'll inspect the header/encoding ourselves 141 142 stream.mark(1024); 143 version = checkHeader(focusFragment.getErrors(), stream); 144 stream.reset(); 145 146 // use a slower parser that keeps location data 147 TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory(); 148 Transformer nullTransformer = transformerFactory.newTransformer(); 149 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 150 doc = docBuilder.newDocument(); 151 DOMResult domResult = new DOMResult(doc); 152 SAXParserFactory spf = SAXParserFactory.newInstance(); 153 spf.setNamespaceAware(true); 154 spf.setValidating(false); 155 // xxe protection 156 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 157 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 158 SAXParser saxParser = spf.newSAXParser(); 159 XMLReader xmlReader = saxParser.getXMLReader(); 160 // xxe protection 161 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 162 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 163 164 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 165 InputSource inputSource = new InputSource(stream); 166 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 167 nullTransformer.transform(saxSource, domResult); 168 } else { 169 DocumentBuilder builder = factory.newDocumentBuilder(); 170 builder.setErrorHandler(new NullErrorHandler()); 171 doc = builder.parse(stream); 172 } 173 } catch (Exception e) { 174 if (e.getMessage().contains("lineNumber:") && e.getMessage().contains("columnNumber:")) { 175 int line = Utilities.parseInt(extractVal(e.getMessage(), "lineNumber"), 0); 176 int col = Utilities.parseInt(extractVal(e.getMessage(), "columnNumber"), 0); 177 logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, line, col, "(xml)", IssueType.INVALID, e.getMessage().substring(e.getMessage().lastIndexOf(";")+1).trim(), IssueSeverity.FATAL); 178 } else { 179 logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, 0, 0, "(xml)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 180 } 181 doc = null; 182 } 183 if (doc != null) { 184 focusFragment.setElement(parse(focusFragment.getErrors(), doc)); 185 } 186 List<ValidatedFragment> res = new ArrayList<>(); 187 res.add(focusFragment); 188 return res; 189 } 190 191 192 private String extractVal(String src, String name) { 193 src = src.substring(src.indexOf(name)+name.length()+1); 194 src = src.substring(0, src.indexOf(";")).trim(); 195 return src; 196 } 197 private void checkForProcessingInstruction(List<ValidationMessage> errors, Document document) throws FHIRFormatError { 198 if (policy == ValidationPolicy.EVERYTHING && FormatUtilities.FHIR_NS.equals(document.getDocumentElement().getNamespaceURI())) { 199 Node node = document.getFirstChild(); 200 while (node != null) { 201 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 202 logError(errors, ValidationMessage.NO_RULE_DATE, line(document, false), col(document, false), "(document)", IssueType.INVALID, context.formatMessage( 203 I18nConstants.NO_PROCESSING_INSTRUCTIONS_ALLOWED_IN_RESOURCES), IssueSeverity.ERROR); 204 node = node.getNextSibling(); 205 } 206 } 207 } 208 209 210 private int line(Node node, boolean end) { 211 XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 212 return loc == null ? 0 : end ? loc.getEndLine() : loc.getStartLine(); 213 } 214 215 private int col(Node node, boolean end) { 216 XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 217 return loc == null ? 0 : end ? loc.getEndColumn() : loc.getStartColumn(); 218 } 219 220 public Element parse(List<ValidationMessage> errors, Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 221 checkForProcessingInstruction(errors, doc); 222 org.w3c.dom.Element element = doc.getDocumentElement(); 223 return parse(errors, element); 224 } 225 226 public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 227 String ns = element.getNamespaceURI(); 228 String name = element.getLocalName(); 229 String path = "/"+pathPrefix(ns)+name; 230 231 StructureDefinition sd = getDefinition(errors, line(element, false), col(element, false), (ns == null ? "noNamespace" : ns), name); 232 if (sd == null) 233 return null; 234 235 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML); 236 result.setPath(element.getLocalName()); 237 checkElement(errors, element, result, path, result.getProperty(), false); 238 result.markLocation(line(element, false), col(element, false)); 239 result.setType(element.getLocalName()); 240 parseChildren(errors, path, element, result); 241 result.numberChildren(); 242 return result; 243 } 244 245 private String pathPrefix(String ns) { 246 if (Utilities.noString(ns)) 247 return ""; 248 if (ns.equals(FormatUtilities.FHIR_NS)) 249 return "f:"; 250 if (ns.equals(FormatUtilities.XHTML_NS)) 251 return "h:"; 252 if (ns.equals("urn:hl7-org:v3")) 253 return "v3:"; 254 if (ns.equals("urn:hl7-org:sdtc")) 255 return "sdtc:"; 256 if (ns.equals("urn:ihe:pharm")) 257 return "pharm:"; 258 if (ns.equals("http://ns.electronichealth.net.au/Ci/Cda/Extensions/3.0")) 259 return "ext:"; 260 return "?:"; 261 } 262 263 private boolean empty(org.w3c.dom.Element element) { 264 for (int i = 0; i < element.getAttributes().getLength(); i++) { 265 String n = element.getAttributes().item(i).getNodeName(); 266 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 267 return false; 268 } 269 if (!Utilities.noString(element.getTextContent().trim())) 270 return false; 271 272 Node n = element.getFirstChild(); 273 while (n != null) { 274 if (n.getNodeType() == Node.ELEMENT_NODE) 275 return false; 276 n = n.getNextSibling(); 277 } 278 return true; 279 } 280 281 private void checkElement(List<ValidationMessage> errors, org.w3c.dom.Element element, Element e, String path, Property prop, boolean xsiTypeChecked) throws FHIRFormatError { 282 if (policy == ValidationPolicy.EVERYTHING) { 283 if (empty(element) && FormatUtilities.FHIR_NS.equals(element.getNamespaceURI())) // this rule only applies to FHIR Content 284 logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.ELEMENT_MUST_HAVE_SOME_CONTENT), IssueSeverity.ERROR); 285 String ns = prop.getXmlNamespace(); 286 String elementNs = element.getNamespaceURI(); 287 if (elementNs == null) { 288 elementNs = "noNamespace"; 289 } 290 if (!elementNs.equals(ns)) { 291 logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.WRONG_NAMESPACE__EXPECTED_, ns), IssueSeverity.ERROR); 292 } 293 if (!xsiTypeChecked) { 294 String xsiType = element.getAttributeNS(FormatUtilities.NS_XSI, "type"); 295 if (!Utilities.noString(xsiType)) { 296 String actualType = prop.getXmlTypeName(); 297 if (xsiType.equals(actualType)) { 298 logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_UNNECESSARY), IssueSeverity.INFORMATION); 299 } else { 300 StructureDefinition sd = findLegalConstraint(xsiType, actualType); 301 if (sd != null) { 302 e.setType(sd.getType()); 303 e.setExplicitType(xsiType); 304 } else { 305 logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_WRONG, xsiType, actualType), IssueSeverity.ERROR); 306 } 307 } 308 } 309 } 310 } 311 } 312 313 private StructureDefinition findLegalConstraint(String xsiType, String actualType) { 314 StructureDefinition sdA = context.fetchTypeDefinition(actualType); 315 StructureDefinition sd = context.fetchTypeDefinition(xsiType); 316 while (sd != null) { 317 if (sd == sdA) { 318 return sd; 319 } 320 sd = context.fetchResource(StructureDefinition.class, sd.getBaseDefinition()); 321 } 322 return null; 323 } 324 325 public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element base, String type) throws Exception { 326 StructureDefinition sd = getDefinition(errors, 0, 0, FormatUtilities.FHIR_NS, type); 327 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML).setNativeObject(base); 328 result.setPath(base.getLocalName()); 329 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 330 checkElement(errors, base, result, path, result.getProperty(), false); 331 result.setType(base.getLocalName()); 332 parseChildren(errors, path, base, result); 333 result.numberChildren(); 334 return result; 335 } 336 337 private void parseChildren(List<ValidationMessage> errors, String path, org.w3c.dom.Element node, Element element) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 338 // this parsing routine retains the original order in a the XML file, to support validation 339 reapComments(node, element); 340 List<Property> properties = element.getProperty().getChildProperties(element.getName(), XMLUtil.getXsiType(node)); 341 Property cgProp = getChoiceGroupProp(properties); 342 Property mtProp = cgProp == null ? null : getTextProp(cgProp.getChildProperties(null, null)); 343 344 String text = mtProp == null ? XMLUtil.getDirectText(node).trim() : null; 345 int line = line(node, false); 346 int col = col(node, false); 347 if (!Utilities.noString(text)) { 348 Property property = getTextProp(properties); 349 if (property != null) { 350 if ("ED.data[x]".equals(property.getDefinition().getId()) || (property.getDefinition()!=null && property.getDefinition().getBase()!=null && "ED.data[x]".equals(property.getDefinition().getBase().getPath()))) { 351 if ("B64".equals(node.getAttribute("representation"))) { 352 Element n = new Element("dataBase64Binary", property, "base64Binary", text).markLocation(line, col).setFormat(FhirFormat.XML); 353 n.setPath(element.getPath()+"."+property.getName()); 354 element.getChildren().add(n); 355 } else { 356 Element n = new Element("dataString", property, "string", text).markLocation(line, col).setFormat(FhirFormat.XML); 357 n.setPath(element.getPath()+"."+property.getName()); 358 element.getChildren().add(n); 359 } 360 } else { 361 Element n = new Element(property.getName(), property, property.getType(), text).markLocation(line, col).setFormat(FhirFormat.XML); 362 n.setPath(element.getPath()+"."+property.getName()); 363 element.getChildren().add(n); 364 } 365 } else { 366 Node n = node.getFirstChild(); 367 while (n != null) { 368 if (n.getNodeType() == Node.TEXT_NODE && !Utilities.noString(n.getTextContent().trim())) { 369 Node nt = n; // try to find the nearest element for a line/col location 370 boolean end = false; 371 while (nt.getPreviousSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) { 372 nt = nt.getPreviousSibling(); 373 end = true; 374 } 375 while (nt.getNextSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) { 376 nt = nt.getNextSibling(); 377 end = false; 378 } 379 line = line(nt, end); 380 col = col(nt, end); 381 logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.TEXT_SHOULD_NOT_BE_PRESENT, Utilities.makeSingleLine(n.getTextContent().trim())), IssueSeverity.ERROR); 382 } 383 n = n.getNextSibling(); 384 } 385 } 386 } 387 388 for (int i = 0; i < node.getAttributes().getLength(); i++) { 389 Node attr = node.getAttributes().item(i); 390 String value = attr.getNodeValue(); 391 if (!validAttrValue(value)) { 392 logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.XML_ATTR_VALUE_INVALID, attr.getNodeName()), IssueSeverity.ERROR); 393 } 394 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 395 Property property = getAttrProp(properties, attr.getLocalName(), attr.getNamespaceURI()); 396 if (property != null) { 397 String av = attr.getNodeValue(); 398 if (ToolingExtensions.hasExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT)) 399 av = convertForDateFormatFromExternal(ToolingExtensions.readStringExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av); 400 if (property.getName().equals("value") && element.isPrimitive()) 401 element.setValue(av); 402 else { 403 String[] vl = {av}; 404 if (property.isList() && av.contains(" ")) { 405 vl = av.split(" "); 406 } 407 for (String v : vl) { 408 Element n = new Element(property.getName(), property, property.getType(), v).markLocation(line, col).setFormat(FhirFormat.XML); 409 n.setPath(element.getPath()+"."+property.getName()); 410 element.getChildren().add(n); 411 } 412 } 413 } else { 414 boolean ok = false; 415 if (FormatUtilities.FHIR_NS.equals(node.getNamespaceURI())) { 416 if (attr.getLocalName().equals("schemaLocation") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())) { 417 ok = ok || allowXsiLocation; 418 } 419 } else 420 ok = ok || (attr.getLocalName().equals("schemaLocation")); // xsi:schemalocation allowed for non FHIR content 421 ok = ok || (hasTypeAttr(element) && attr.getLocalName().equals("type") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())); // xsi:type allowed if element says so 422 if (!ok) { 423 logError(errors, ValidationMessage.NO_RULE_DATE, line(node, false), col(node, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ATTRIBUTE__ON__FOR_TYPE__PROPERTIES__, attr.getNodeName(), node.getNodeName(), element.fhirType(), properties), IssueSeverity.ERROR); 424 } 425 } 426 } 427 } 428 429 String lastName = null; 430 int repeatCount = 0; 431 Node child = node.getFirstChild(); 432 while (child != null) { 433 if (child.getNodeType() == Node.ELEMENT_NODE) { 434 Property property = getElementProp(properties, child.getLocalName(), child.getNamespaceURI()); 435 436 if (property != null) { 437 if (property.getName().equals(lastName)) { 438 repeatCount++; 439 } else { 440 lastName = property.getName(); 441 repeatCount = 0; 442 } 443 if (!property.isChoice() && "xhtml".equals(property.getType())) { 444 XhtmlNode xhtml; 445 if (property.getDefinition().hasRepresentation(PropertyRepresentation.CDATEXT)) 446 xhtml = new CDANarrativeFormat().convert((org.w3c.dom.Element) child); 447 else { 448 XhtmlParser xp = new XhtmlParser(); 449 xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child); 450 if (policy == ValidationPolicy.EVERYTHING) { 451 for (StringPair s : xp.getValidationIssues()) { 452 logError(errors, "2022-11-17", line(child, false), col(child, false), path, IssueType.INVALID, context.formatMessage(s.getName(), s.getValue()), IssueSeverity.ERROR); 453 } 454 } 455 } 456 Element n = new Element(property.getName(), property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 457 n.setPath(element.getPath()+"."+property.getName()); 458 element.getChildren().add(n); 459 } else { 460 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 461 String name = child.getLocalName(); 462 if (!property.isChoice() && !name.equals(property.getName())) { 463 name = property.getName(); 464 } 465 Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 466 if (property.isList()) { 467 n.setPath(element.getPath()+"."+property.getName()+"["+repeatCount+"]"); 468 } else { 469 n.setPath(element.getPath()+"."+property.getName()); 470 } 471 boolean xsiTypeChecked = false; 472 boolean ok = true; 473 if (property.isChoice()) { 474 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 475 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 476 if (Utilities.noString(xsiType)) { 477 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype")) { 478 xsiType = ToolingExtensions.readStringExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype"); 479 n.setType(xsiType); 480 } else { 481 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NO_TYPE_FOUND_ON_, child.getLocalName()), IssueSeverity.ERROR); 482 ok = false; 483 } 484 } else { 485 if (xsiType.contains(":")) 486 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 487 n.setType(xsiType); 488 n.setExplicitType(xsiType); 489 } 490 xsiTypeChecked = true; 491 } else 492 n.setType(n.getType()); 493 } 494 checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), xsiTypeChecked); 495 element.getChildren().add(n); 496 if (ok) { 497 if (property.isResource()) 498 parseResource(errors, npath, (org.w3c.dom.Element) child, n, property); 499 else 500 parseChildren(errors, npath, (org.w3c.dom.Element) child, n); 501 } 502 } 503 } else { 504 if (cgProp != null) { 505 property = getElementProp(cgProp.getChildProperties(null, null), child.getLocalName(), child.getNamespaceURI()); 506 if (property != null) { 507 if (cgProp.getName().equals(lastName)) { 508 repeatCount++; 509 } else { 510 lastName = cgProp.getName(); 511 repeatCount = 0; 512 } 513 514 String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName(); 515 String name = cgProp.getName(); 516 Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML); 517 cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 518 element.getChildren().add(cgn); 519 520 npath = npath+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 521 name = child.getLocalName(); 522 Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 523 cgn.getChildren().add(n); 524 n.setPath(element.getPath()+"."+property.getName()); 525 checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), false); 526 parseChildren(errors, npath, (org.w3c.dom.Element) child, n); 527 } 528 } 529 if (property == null) { 530 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ELEMENT_, child.getLocalName(), path), IssueSeverity.ERROR); 531 } 532 } 533 } else if (child.getNodeType() == Node.TEXT_NODE && !Utilities.noString(child.getTextContent().trim()) && mtProp != null) { 534 if (cgProp.getName().equals(lastName)) { 535 repeatCount++; 536 } else { 537 lastName = cgProp.getName(); 538 repeatCount = 0; 539 } 540 541 String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName(); 542 String name = cgProp.getName(); 543 Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML); 544 cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 545 element.getChildren().add(cgn); 546 547 npath = npath+"/text()"; 548 name = mtProp.getName(); 549 Element n = new Element(name, mtProp, mtProp.getType(), child.getTextContent().trim()).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child); 550 cgn.getChildren().add(n); 551 n.setPath(element.getPath()+"."+mtProp.getName()); 552 553 554 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) { 555 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.CDATA_IS_NOT_ALLOWED), IssueSeverity.ERROR); 556 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 557 logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NODE_TYPE__IS_NOT_ALLOWED, Integer.toString(child.getNodeType())), IssueSeverity.ERROR); 558 } 559 child = child.getNextSibling(); 560 } 561 } 562 563 private Property getChoiceGroupProp(List<Property> properties) { 564 for (Property p : properties) { 565 if (p.getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) { 566 return p; 567 } 568 } 569 return null; 570 } 571 572 private boolean validAttrValue(String value) { 573 if (version == null) { 574 return true; 575 } 576 if (version.equals("1.0")) { 577 boolean ok = true; 578 for (char ch : value.toCharArray()) { 579 if (ch <= 0x1F && !Utilities.existsInList(ch, '\r', '\n', '\t')) { 580 ok = false; 581 } 582 } 583 return ok; 584 } else 585 return true; 586 } 587 588 589 private Property getElementProp(List<Property> properties, String nodeName, String namespace) { 590 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 591 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 592 // and therefore the longer property names get evaluated first 593 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 594 @Override 595 public int compare(Property o1, Property o2) { 596 return o2.getName().length() - o1.getName().length(); 597 } 598 }); 599 // first scan, by namespace 600 for (Property p : propsSortedByLongestFirst) { 601 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 602 if (p.getXmlName().equals(nodeName) && p.getXmlNamespace().equals(namespace)) 603 return p; 604 } 605 } 606 for (Property p : propsSortedByLongestFirst) { 607 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 608 if (p.getXmlName().equals(nodeName)) 609 return p; 610 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 611 return p; 612 } 613 } 614 615 616 return null; 617 } 618 619 private Property getAttrProp(List<Property> properties, String nodeName, String namespace) { 620 for (Property p : properties) { 621 if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && p.getXmlNamespace().equals(namespace)) { 622 return p; 623 } 624 } 625 if (namespace == null) { 626 for (Property p : properties) { 627 if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) { 628 return p; 629 } 630 } 631 } 632 return null; 633 } 634 635 private Property getTextProp(List<Property> properties) { 636 for (Property p : properties) 637 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 638 return p; 639 return null; 640 } 641 642 private String convertForDateFormatFromExternal(String fmt, String av) throws FHIRException { 643 if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) { 644 try { 645 DateTimeType d = DateTimeType.parseV3(av); 646 return d.asStringValue(); 647 } catch (Exception e) { 648 return av; // not at all clear what to do in this case. 649 } 650 } 651 throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATA_FORMAT_, fmt)); 652 } 653 654 private String convertForDateFormatToExternal(String fmt, String av) throws FHIRException { 655 if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) { 656 DateTimeType d = new DateTimeType(av); 657 return d.getAsV3(); 658 } else 659 throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATE_FORMAT_, fmt)); 660 } 661 662 private void parseResource(List<ValidationMessage> errors, String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 663 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 664 String name = res.getLocalName(); 665 StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name, null)); 666 if (sd == null) 667 throw new FHIRFormatError(context.formatMessage(I18nConstants.CONTAINED_RESOURCE_DOES_NOT_APPEAR_TO_BE_A_FHIR_RESOURCE_UNKNOWN_NAME_, res.getLocalName())); 668 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities()), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 669 parent.setType(name); 670 parseChildren(errors, res.getLocalName(), res, parent); 671 } 672 673 private void reapComments(org.w3c.dom.Element element, Element context) { 674 Node node = element.getPreviousSibling(); 675 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 676 if (node.getNodeType() == Node.COMMENT_NODE) 677 context.getComments().add(0, node.getTextContent()); 678 node = node.getPreviousSibling(); 679 } 680 node = element.getLastChild(); 681 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 682 node = node.getPreviousSibling(); 683 } 684 while (node != null) { 685 if (node.getNodeType() == Node.COMMENT_NODE) 686 context.getComments().add(node.getTextContent()); 687 node = node.getNextSibling(); 688 } 689 } 690 691 private boolean isAttr(Property property) { 692 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 693 if (r.getValue() == PropertyRepresentation.XMLATTR) { 694 return true; 695 } 696 } 697 return false; 698 } 699 700 private boolean isCdaText(Property property) { 701 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 702 if (r.getValue() == PropertyRepresentation.CDATEXT) { 703 return true; 704 } 705 } 706 return false; 707 } 708 709 private boolean isTypeAttr(Property property) { 710 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 711 if (r.getValue() == PropertyRepresentation.TYPEATTR) { 712 return true; 713 } 714 } 715 return false; 716 } 717 718 private boolean isText(Property property) { 719 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 720 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 721 return true; 722 } 723 } 724 return false; 725 } 726 727 @Override 728 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException { 729 markedXhtml = false; 730 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 731 xml.setSortAttributes(false); 732 xml.setPretty(style == OutputStyle.PRETTY); 733 xml.start(); 734 if (e.getPath() == null) { 735 e.populatePaths(null); 736 } 737 String ns = e.getProperty().getXmlNamespace(); 738 if (ns!=null && !"noNamespace".equals(ns)) { 739 xml.setDefaultNamespace(ns); 740 } 741 if (hasTypeAttr(e)) 742 xml.namespace("http://www.w3.org/2001/XMLSchema-instance", "xsi"); 743 addNamespaces(xml, e); 744 composeElement(xml, e, e.getType(), true); 745 xml.end(); 746 } 747 748 private void addNamespaces(IXMLWriter xml, Element e) throws IOException { 749 String ns = e.getProperty().getXmlNamespace(); 750 if (ns!=null && xml.getDefaultNamespace()!=null && !xml.getDefaultNamespace().equals(ns)){ 751 if (!xml.namespaceDefined(ns)) { 752 String prefix = pathPrefix(ns); 753 if (prefix.endsWith(":")) { 754 prefix = prefix.substring(0, prefix.length()-1); 755 } 756 if ("?".equals(prefix)) { 757 xml.namespace(ns); 758 } else { 759 xml.namespace(ns, prefix); 760 } 761 } 762 } 763 for (Element c : e.getChildren()) { 764 addNamespaces(xml, c); 765 } 766 } 767 768 private boolean hasTypeAttr(Element e) { 769 if (isTypeAttr(e.getProperty())) 770 return true; 771 for (Element c : e.getChildren()) { 772 if (hasTypeAttr(c)) 773 return true; 774 } 775 // xsi_type is always allowed on CDA elements. right now, I'm not sure where to indicate this in the model, 776 // so it's just hardcoded here 777 if (e.getType() != null && e.getType().startsWith(Constants.NS_CDA_ROOT)) { 778 return true; 779 } 780 return false; 781 } 782 783 private void setXsiTypeIfIsTypeAttr(IXMLWriter xml, Element element) throws IOException, FHIRException { 784 if (isTypeAttr(element.getProperty()) && !Utilities.noString(element.getType())) { 785 String type = element.getType(); 786 if (Utilities.isAbsoluteUrl(type)) { 787 type = type.substring(type.lastIndexOf("/")+1); 788 } 789 xml.attribute("xsi:type",type); 790 } 791 } 792 793 public void compose(Element e, IXMLWriter xml) throws Exception { 794 if (e.getPath() == null) { 795 e.populatePaths(null); 796 } 797 markedXhtml = false; 798 xml.start(); 799 xml.setDefaultNamespace(e.getProperty().getXmlNamespace()); 800 if (schemaPath != null) { 801 xml.setSchemaLocation(FormatUtilities.FHIR_NS, Utilities.pathURL(schemaPath, e.fhirType()+".xsd")); 802 } 803 composeElement(xml, e, e.getType(), true); 804 xml.end(); 805 } 806 807 private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException, FHIRException { 808 if (showDecorations) { 809 @SuppressWarnings("unchecked") 810 List<ElementDecoration> decorations = (List<ElementDecoration>) element.getUserData("fhir.decorations"); 811 if (decorations != null) 812 for (ElementDecoration d : decorations) 813 xml.decorate(d); 814 } 815 for (String s : element.getComments()) { 816 xml.comment(s, true); 817 } 818 if (isText(element.getProperty())) { 819 if (linkResolver != null) 820 xml.link(linkResolver.resolveProperty(element.getProperty())); 821 xml.enter(element.getProperty().getXmlNamespace(),elementName); 822 if (linkResolver != null && element.getProperty().isReference()) { 823 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 824 if (ref != null) { 825 xml.externalLink(ref); 826 } 827 } 828 xml.text(element.getValue()); 829 xml.exit(element.getProperty().getXmlNamespace(),elementName); 830 } else if (!element.hasChildren() && !element.hasValue()) { 831 if (element.getExplicitType() != null) 832 xml.attribute("xsi:type", element.getExplicitType()); 833 xml.element(elementName); 834 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 835 if (element.getType().equals("xhtml")) { 836 String rawXhtml = element.getValue(); 837 if (isCdaText(element.getProperty())) { 838 new CDANarrativeFormat().convert(xml, new XhtmlParser().parseFragment(rawXhtml)); 839 } else { 840 xml.escapedText(rawXhtml); 841 if (!markedXhtml) { 842 xml.anchor("end-xhtml"); 843 markedXhtml = true; 844 } 845 } 846 } else if (isText(element.getProperty())) { 847 if (linkResolver != null) 848 xml.link(linkResolver.resolveProperty(element.getProperty())); 849 xml.text(element.getValue()); 850 } else { 851 setXsiTypeIfIsTypeAttr(xml, element); 852 if (element.hasValue()) { 853 if (linkResolver != null) 854 xml.link(linkResolver.resolveType(element.getType())); 855 xml.attribute("value", element.getValue()); 856 } 857 if (linkResolver != null) 858 xml.link(linkResolver.resolveProperty(element.getProperty())); 859 if (element.hasChildren()) { 860 xml.enter(element.getProperty().getXmlNamespace(), elementName); 861 if (linkResolver != null && element.getProperty().isReference()) { 862 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 863 if (ref != null) { 864 xml.externalLink(ref); 865 } 866 } 867 for (Element child : element.getChildren()) 868 composeElement(xml, child, child.getName(), false); 869 xml.exit(element.getProperty().getXmlNamespace(),elementName); 870 } else 871 xml.element(elementName); 872 } 873 } else { 874 setXsiTypeIfIsTypeAttr(xml, element); 875 Set<String> handled = new HashSet<>(); 876 for (Element child : element.getChildren()) { 877 if (!handled.contains(child.getName()) && isAttr(child.getProperty()) && wantCompose(element.getPath(), child)) { 878 handled.add(child.getName()); 879 String av = child.getValue(); 880 if (child.getProperty().isList()) { 881 for (Element c2 : element.getChildren()) { 882 if (c2 != child && c2.getName().equals(child.getName())) { 883 av = av + " "+c2.getValue(); 884 } 885 } 886 } 887 if (linkResolver != null) 888 xml.link(linkResolver.resolveType(child.getType())); 889 if (ToolingExtensions.hasExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT)) 890 av = convertForDateFormatToExternal(ToolingExtensions.readStringExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av); 891 xml.attribute(child.getProperty().getXmlNamespace(),child.getProperty().getXmlName(), av); 892 } 893 } 894 if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) { 895 if (linkResolver != null) 896 xml.link(linkResolver.resolveProperty(element.getProperty())); 897 if (!xml.namespaceDefined(element.getProperty().getXmlNamespace())) { 898 String abbrev = makeNamespaceAbbrev(element.getProperty(), xml); 899 xml.namespace(element.getProperty().getXmlNamespace(), abbrev); 900 } 901 xml.enter(element.getProperty().getXmlNamespace(), elementName); 902 } 903 904 if (!root && element.getSpecial() != null) { 905 if (linkResolver != null) 906 xml.link(linkResolver.resolveProperty(element.getProperty())); 907 xml.enter(element.getProperty().getXmlNamespace(),element.getType()); 908 } 909 if (linkResolver != null && element.getProperty().isReference()) { 910 String ref = linkResolver.resolveReference(getReferenceForElement(element)); 911 if (ref != null) { 912 xml.externalLink(ref); 913 } 914 } 915 for (Element child : element.getChildren()) { 916 if (wantCompose(element.getPath(), child)) { 917 if (isText(child.getProperty())) { 918 if (linkResolver != null) 919 xml.link(linkResolver.resolveProperty(element.getProperty())); 920 xml.text(child.getValue()); 921 } else if (!isAttr(child.getProperty())) { 922 composeElement(xml, child, child.getName(), false); 923 } 924 } 925 } 926 if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) { 927 if (!root && element.getSpecial() != null) 928 xml.exit(element.getProperty().getXmlNamespace(),element.getType()); 929 xml.exit(element.getProperty().getXmlNamespace(),elementName); 930 } 931 } 932 } 933 934 private String makeNamespaceAbbrev(Property property, IXMLWriter xml) { 935 // it's a cosmetic thing, but we're going to try to come up with a nice namespace 936 937 ElementDefinition ed = property.getDefinition(); 938 String ns = property.getXmlNamespace(); 939 String n = property.getXmlName(); 940 941 String diff = property.getName().toLowerCase().replace(n.toLowerCase(), ""); 942 if (!Utilities.noString(diff) && diff.length() <= 5 && Utilities.isToken(diff) && !xml.abbreviationDefined(diff)) { 943 return diff; 944 } 945 946 int i = ns.length()-1; 947 while (i > 0) { 948 if (Character.isAlphabetic(ns.charAt(i)) || Character.isDigit(ns.charAt(i))) { 949 i--; 950 } else { 951 break; 952 } 953 } 954 String tail = ns.substring(i+1); 955 if (!Utilities.noString(tail) && tail.length() <= 5 && Utilities.isToken(tail) && !xml.abbreviationDefined(tail)) { 956 return tail; 957 } 958 959 i = 0; 960 while (xml.abbreviationDefined("ns"+i)) { 961 i++; 962 } 963 return "ns"+i; 964 } 965 private String checkHeader(List<ValidationMessage> errors, InputStream stream) throws IOException { 966 try { 967 // the stream will either start with the UTF-8 BOF or with <xml 968 int i0 = stream.read(); 969 int i1 = stream.read(); 970 int i2 = stream.read(); 971 972 StringBuilder b = new StringBuilder(); 973 if (i0 == 0xEF && i1 == 0xBB && i2 == 0xBF) { 974 // ok, it's UTF-8 975 } else if (i0 == 0x3C && i1 == 0x3F && i2 == 0x78) { // <xm 976 b.append((char) i0); 977 b.append((char) i1); 978 b.append((char) i2); 979 } else if (i0 == 60) { // just plain old XML with no header 980 return "1.0"; 981 } else { 982 throw new Exception(context.formatMessage(I18nConstants.XML_ENCODING_INVALID)); 983 } 984 int i = stream.read(); 985 do { 986 b.append((char) i); 987 i = stream.read(); 988 } while (i != 0x3E); 989 String header = b.toString(); 990 String e = null; 991 i = header.indexOf("encoding=\""); 992 if (i > -1) { 993 e = header.substring(i+10, i+15); 994 } else { 995 i = header.indexOf("encoding='"); 996 if (i > -1) { 997 e = header.substring(i+10, i+15); 998 } 999 } 1000 if (e != null && !"UTF-8".equalsIgnoreCase(e)) { 1001 logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, context.formatMessage(I18nConstants.XML_ENCODING_INVALID), IssueSeverity.ERROR); 1002 } 1003 1004 i = header.indexOf("version=\""); 1005 if (i > -1) { 1006 return header.substring(i+9, i+12); 1007 } else { 1008 i = header.indexOf("version='"); 1009 if (i > -1) { 1010 return header.substring(i+9, i+12); 1011 } 1012 } 1013 return "?xml-p1?"; 1014 } catch (Exception e) { 1015 // suppress this error 1016 logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, e.getMessage(), IssueSeverity.ERROR); 1017 } 1018 return "?xml-p2?"; 1019 } 1020 1021 class NullErrorHandler implements ErrorHandler { 1022 @Override 1023 public void fatalError(SAXParseException e) { 1024 // do nothing 1025 } 1026 1027 @Override 1028 public void error(SAXParseException e) { 1029 // do nothing 1030 } 1031 1032 @Override 1033 public void warning(SAXParseException e) { 1034 // do nothing 1035 } 1036 } 1037}