001package org.hl7.fhir.r5.elementmodel;
002
003import java.io.ByteArrayInputStream;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011
012 * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014 * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017 * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031
032 */
033
034
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.OutputStream;
038import java.util.ArrayList;
039import java.util.Collections;
040import java.util.Comparator;
041import java.util.HashSet;
042import java.util.List;
043import java.util.Set;
044
045import javax.xml.parsers.DocumentBuilder;
046import javax.xml.parsers.DocumentBuilderFactory;
047import javax.xml.parsers.SAXParser;
048import javax.xml.parsers.SAXParserFactory;
049import javax.xml.transform.Transformer;
050import javax.xml.transform.TransformerFactory;
051import javax.xml.transform.dom.DOMResult;
052import javax.xml.transform.sax.SAXSource;
053
054import org.hl7.fhir.exceptions.DefinitionException;
055import org.hl7.fhir.exceptions.FHIRException;
056import org.hl7.fhir.exceptions.FHIRFormatError;
057import org.hl7.fhir.r5.conformance.profile.ProfileUtilities;
058import org.hl7.fhir.r5.context.IWorkerContext;
059import org.hl7.fhir.r5.elementmodel.Element.SpecialElement;
060import org.hl7.fhir.r5.elementmodel.Manager.FhirFormat;
061import org.hl7.fhir.r5.formats.FormatUtilities;
062import org.hl7.fhir.r5.formats.IParser.OutputStyle;
063import org.hl7.fhir.r5.model.Constants;
064import org.hl7.fhir.r5.model.DateTimeType;
065import org.hl7.fhir.r5.model.ElementDefinition;
066import org.hl7.fhir.r5.model.ElementDefinition.PropertyRepresentation;
067import org.hl7.fhir.r5.model.Enumeration;
068import org.hl7.fhir.r5.model.StructureDefinition;
069import org.hl7.fhir.r5.utils.ToolingExtensions;
070import org.hl7.fhir.r5.utils.formats.XmlLocationAnnotator;
071import org.hl7.fhir.r5.utils.formats.XmlLocationData;
072import org.hl7.fhir.utilities.ElementDecoration;
073import org.hl7.fhir.utilities.StringPair;
074import org.hl7.fhir.utilities.TextFile;
075import org.hl7.fhir.utilities.Utilities;
076import org.hl7.fhir.utilities.i18n.I18nConstants;
077import org.hl7.fhir.utilities.validation.ValidationMessage;
078import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
079import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
080import org.hl7.fhir.utilities.xhtml.CDANarrativeFormat;
081import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
082import org.hl7.fhir.utilities.xhtml.XhtmlNode;
083import org.hl7.fhir.utilities.xhtml.XhtmlParser;
084import org.hl7.fhir.utilities.xml.IXMLWriter;
085import org.hl7.fhir.utilities.xml.XMLUtil;
086import org.hl7.fhir.utilities.xml.XMLWriter;
087import org.w3c.dom.Document;
088import org.w3c.dom.Node;
089import org.xml.sax.ErrorHandler;
090import org.xml.sax.InputSource;
091import org.xml.sax.SAXParseException;
092import org.xml.sax.XMLReader;
093
094public class XmlParser extends ParserBase {
095  private boolean allowXsiLocation;
096  private String version;
097
098  public XmlParser(IWorkerContext context) {
099    super(context);
100  }
101
102  private String schemaPath;
103  private boolean markedXhtml;
104
105  public String getSchemaPath() {
106    return schemaPath;
107  }
108  public void setSchemaPath(String schemaPath) {
109    this.schemaPath = schemaPath;
110  }
111
112  public boolean isAllowXsiLocation() {
113    return allowXsiLocation;
114  }
115
116  public void setAllowXsiLocation(boolean allowXsiLocation) {
117    this.allowXsiLocation = allowXsiLocation;
118  }
119
120  public List<ValidatedFragment> parse(InputStream inStream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
121
122    byte[] content = TextFile.streamToBytes(inStream);
123    ValidatedFragment focusFragment = new ValidatedFragment(ValidatedFragment.FOCUS_NAME, "xml", content, false);
124
125    ByteArrayInputStream stream = new ByteArrayInputStream(content);
126    Document doc = null;
127    try {
128      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
129      // xxe protection
130      factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
131      factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
132      factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
133      factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
134      factory.setXIncludeAware(false);
135      factory.setExpandEntityReferences(false);
136
137      factory.setNamespaceAware(true);
138      if (policy == ValidationPolicy.EVERYTHING) {
139        // The SAX interface appears to not work when reporting the correct version/encoding.
140        // if we can, we'll inspect the header/encoding ourselves 
141
142        stream.mark(1024);
143        version = checkHeader(focusFragment.getErrors(), stream);
144        stream.reset();
145
146        // use a slower parser that keeps location data
147        TransformerFactory transformerFactory = TransformerFactory.newInstance();
148        Transformer nullTransformer = transformerFactory.newTransformer();
149        DocumentBuilder docBuilder = factory.newDocumentBuilder();
150        doc = docBuilder.newDocument();
151        DOMResult domResult = new DOMResult(doc);
152        SAXParserFactory spf = SAXParserFactory.newInstance();
153        spf.setNamespaceAware(true);
154        spf.setValidating(false);
155        // xxe protection
156        spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
157        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
158        SAXParser saxParser = spf.newSAXParser();
159        XMLReader xmlReader = saxParser.getXMLReader();
160        // xxe protection
161        xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
162        xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
163
164        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
165        InputSource inputSource = new InputSource(stream);
166        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
167        nullTransformer.transform(saxSource, domResult);
168      } else {
169        DocumentBuilder builder = factory.newDocumentBuilder();
170        builder.setErrorHandler(new NullErrorHandler());
171        doc = builder.parse(stream);
172      }
173    } catch (Exception e) {
174      if (e.getMessage().contains("lineNumber:") && e.getMessage().contains("columnNumber:")) {
175        int line = Utilities.parseInt(extractVal(e.getMessage(), "lineNumber"), 0); 
176        int col = Utilities.parseInt(extractVal(e.getMessage(), "columnNumber"), 0); 
177        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, line, col, "(xml)", IssueType.INVALID, e.getMessage().substring(e.getMessage().lastIndexOf(";")+1).trim(), IssueSeverity.FATAL);
178      } else {
179        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, 0, 0, "(xml)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
180      }
181      doc = null;
182    }
183    if (doc != null) {
184      focusFragment.setElement(parse(focusFragment.getErrors(), doc));
185    }
186    List<ValidatedFragment> res = new ArrayList<>();
187    res.add(focusFragment);
188    return res;
189  }
190
191
192  private String extractVal(String src, String name) {
193    src = src.substring(src.indexOf(name)+name.length()+1);
194    src = src.substring(0, src.indexOf(";")).trim();
195    return src;
196  }
197  private void checkForProcessingInstruction(List<ValidationMessage> errors, Document document) throws FHIRFormatError {
198    if (policy == ValidationPolicy.EVERYTHING && FormatUtilities.FHIR_NS.equals(document.getDocumentElement().getNamespaceURI())) {
199      Node node = document.getFirstChild();
200      while (node != null) {
201        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
202          logError(errors, ValidationMessage.NO_RULE_DATE, line(document, false), col(document, false), "(document)", IssueType.INVALID, context.formatMessage(
203              I18nConstants.NO_PROCESSING_INSTRUCTIONS_ALLOWED_IN_RESOURCES), IssueSeverity.ERROR);
204        node = node.getNextSibling();
205      }
206    }
207  }
208
209
210  private int line(Node node, boolean end) {
211    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
212    return loc == null ? 0 : end ? loc.getEndLine() : loc.getStartLine();
213  }
214
215  private int col(Node node, boolean end) {
216    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
217    return loc == null ? 0 : end ? loc.getEndColumn() : loc.getStartColumn();
218  }
219
220  public Element parse(List<ValidationMessage> errors, Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
221    checkForProcessingInstruction(errors, doc);
222    org.w3c.dom.Element element = doc.getDocumentElement();
223    return parse(errors, element);
224  }
225
226  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
227    String ns = element.getNamespaceURI();
228    String name = element.getLocalName();
229    String path = "/"+pathPrefix(ns)+name;
230
231    StructureDefinition sd = getDefinition(errors, line(element, false), col(element, false), (ns == null ? "noNamespace" : ns), name);
232    if (sd == null)
233      return null;
234
235    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML);
236    result.setPath(element.getLocalName());
237    checkElement(errors, element, result, path, result.getProperty(), false);
238    result.markLocation(line(element, false), col(element, false));
239    result.setType(element.getLocalName());
240    parseChildren(errors, path, element, result);
241    result.numberChildren();
242    return result;
243  }
244
245  private String pathPrefix(String ns) {
246    if (Utilities.noString(ns))
247      return "";
248    if (ns.equals(FormatUtilities.FHIR_NS))
249      return "f:";
250    if (ns.equals(FormatUtilities.XHTML_NS))
251      return "h:";
252    if (ns.equals("urn:hl7-org:v3"))
253      return "v3:";
254    if (ns.equals("urn:hl7-org:sdtc")) 
255      return "sdtc:";
256    if (ns.equals("urn:ihe:pharm"))
257      return "pharm:";
258    return "?:";
259  }
260
261  private boolean empty(org.w3c.dom.Element element) {
262    for (int i = 0; i < element.getAttributes().getLength(); i++) {
263      String n = element.getAttributes().item(i).getNodeName();
264      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
265        return false;
266    }
267    if (!Utilities.noString(element.getTextContent().trim()))
268      return false;
269
270    Node n = element.getFirstChild();
271    while (n != null) {
272      if (n.getNodeType() == Node.ELEMENT_NODE)
273        return false;
274      n = n.getNextSibling();
275    }
276    return true;
277  }
278
279  private void checkElement(List<ValidationMessage> errors, org.w3c.dom.Element element, Element e, String path, Property prop, boolean xsiTypeChecked) throws FHIRFormatError {
280    if (policy == ValidationPolicy.EVERYTHING) {
281      if (empty(element) && FormatUtilities.FHIR_NS.equals(element.getNamespaceURI())) // this rule only applies to FHIR Content
282        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.ELEMENT_MUST_HAVE_SOME_CONTENT), IssueSeverity.ERROR);
283      String ns = prop.getXmlNamespace();
284      String elementNs = element.getNamespaceURI();
285      if (elementNs == null) {
286        elementNs = "noNamespace";
287      }
288      if (!elementNs.equals(ns)) {
289        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.WRONG_NAMESPACE__EXPECTED_, ns), IssueSeverity.ERROR);
290      }
291      if (!xsiTypeChecked) {
292        String xsiType = element.getAttributeNS(FormatUtilities.NS_XSI, "type");
293        if (!Utilities.noString(xsiType)) {
294          String actualType = prop.getXmlTypeName();
295          if (xsiType.equals(actualType)) {
296            logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_UNNECESSARY), IssueSeverity.INFORMATION);            
297          } else {
298            StructureDefinition sd = findLegalConstraint(xsiType, actualType);
299            if (sd != null) {
300              e.setType(sd.getType());
301              e.setExplicitType(xsiType);
302            } else {
303              logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_WRONG, xsiType, actualType), IssueSeverity.ERROR);           
304            }  
305          }
306        }
307      }
308    }
309  }
310
311  private StructureDefinition findLegalConstraint(String xsiType, String actualType) {
312    StructureDefinition sdA = context.fetchTypeDefinition(actualType);
313    StructureDefinition sd = context.fetchTypeDefinition(xsiType);
314    while (sd != null) {
315      if (sd == sdA) {
316        return sd;
317      }
318      sd = context.fetchResource(StructureDefinition.class, sd.getBaseDefinition());
319    }
320    return null;
321  }
322
323  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element base, String type) throws Exception {
324    StructureDefinition sd = getDefinition(errors, 0, 0, FormatUtilities.FHIR_NS, type);
325    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML).setNativeObject(base);
326    result.setPath(base.getLocalName());
327    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
328    checkElement(errors, base, result, path, result.getProperty(), false);
329    result.setType(base.getLocalName());
330    parseChildren(errors, path, base, result);
331    result.numberChildren();
332    return result;
333  }
334
335  private void parseChildren(List<ValidationMessage> errors, String path, org.w3c.dom.Element node, Element element) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
336    // this parsing routine retains the original order in a the XML file, to support validation
337    reapComments(node, element);
338    List<Property> properties = element.getProperty().getChildProperties(element.getName(), XMLUtil.getXsiType(node));
339    Property cgProp = getChoiceGroupProp(properties);
340    Property mtProp = cgProp == null ? null : getTextProp(cgProp.getChildProperties(null, null));
341
342    String text = mtProp == null ? XMLUtil.getDirectText(node).trim() : null;
343    int line = line(node, false);
344    int col = col(node, false);
345    if (!Utilities.noString(text)) {
346      Property property = getTextProp(properties);
347      if (property != null) {
348        if ("ED.data[x]".equals(property.getDefinition().getId()) || (property.getDefinition()!=null && property.getDefinition().getBase()!=null && "ED.data[x]".equals(property.getDefinition().getBase().getPath()))) {
349          if ("B64".equals(node.getAttribute("representation"))) {
350            Element n = new Element("dataBase64Binary", property, "base64Binary", text).markLocation(line, col).setFormat(FhirFormat.XML);
351            n.setPath(element.getPath()+"."+property.getName());
352            element.getChildren().add(n);
353          } else {
354            Element n = new Element("dataString", property, "string", text).markLocation(line, col).setFormat(FhirFormat.XML);
355            n.setPath(element.getPath()+"."+property.getName());
356            element.getChildren().add(n);
357          }
358        } else {
359          Element n = new Element(property.getName(), property, property.getType(), text).markLocation(line, col).setFormat(FhirFormat.XML);
360          n.setPath(element.getPath()+"."+property.getName());
361          element.getChildren().add(n);
362        }
363      } else {
364        Node n = node.getFirstChild();
365        while (n != null) {
366          if (n.getNodeType() == Node.TEXT_NODE && !Utilities.noString(n.getTextContent().trim())) {
367            Node nt = n; // try to find the nearest element for a line/col location
368            boolean end = false;
369            while (nt.getPreviousSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
370              nt = nt.getPreviousSibling();
371              end = true;
372            }
373            while (nt.getNextSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
374              nt = nt.getNextSibling();
375              end = false;
376            }
377            line = line(nt, end);
378            col = col(nt, end);
379            logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.TEXT_SHOULD_NOT_BE_PRESENT, Utilities.makeSingleLine(n.getTextContent().trim())), IssueSeverity.ERROR);
380          }
381          n = n.getNextSibling();
382        }
383      }                 
384    }
385
386    for (int i = 0; i < node.getAttributes().getLength(); i++) {
387      Node attr = node.getAttributes().item(i);
388      String value = attr.getNodeValue();
389      if (!validAttrValue(value)) {
390        logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.XML_ATTR_VALUE_INVALID, attr.getNodeName()), IssueSeverity.ERROR);
391      }
392      if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
393        Property property = getAttrProp(properties, attr.getLocalName(), attr.getNamespaceURI());
394        if (property != null) {
395          String av = attr.getNodeValue();
396          if (ToolingExtensions.hasExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
397            av = convertForDateFormatFromExternal(ToolingExtensions.readStringExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);          
398          if (property.getName().equals("value") && element.isPrimitive())
399            element.setValue(av);
400          else {
401            String[] vl = {av};
402            if (property.isList() && av.contains(" ")) {
403              vl = av.split(" ");
404            }
405            for (String v : vl) {
406              Element n = new Element(property.getName(), property, property.getType(), v).markLocation(line, col).setFormat(FhirFormat.XML);
407              n.setPath(element.getPath()+"."+property.getName());
408              element.getChildren().add(n);
409            }
410          }
411        } else {
412          boolean ok = false;
413          if (FormatUtilities.FHIR_NS.equals(node.getNamespaceURI())) {
414            if (attr.getLocalName().equals("schemaLocation") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())) {
415              ok = ok || allowXsiLocation; 
416            }
417          } else
418            ok = ok || (attr.getLocalName().equals("schemaLocation")); // xsi:schemalocation allowed for non FHIR content
419          ok = ok || (hasTypeAttr(element) && attr.getLocalName().equals("type") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())); // xsi:type allowed if element says so
420          if (!ok) { 
421            logError(errors, ValidationMessage.NO_RULE_DATE, line(node, false), col(node, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ATTRIBUTE__ON__FOR_TYPE__PROPERTIES__, attr.getNodeName(), node.getNodeName(), element.fhirType(), properties), IssueSeverity.ERROR);
422          }
423        }
424      }
425    }
426
427    String lastName = null;
428    int repeatCount = 0;
429    Node child = node.getFirstChild();
430    while (child != null) {
431      if (child.getNodeType() == Node.ELEMENT_NODE) {
432        Property property = getElementProp(properties, child.getLocalName(), child.getNamespaceURI());
433
434        if (property != null) {
435          if (property.getName().equals(lastName)) {
436            repeatCount++;
437          } else {
438            lastName = property.getName();
439            repeatCount = 0;
440          }
441          if (!property.isChoice() && "xhtml".equals(property.getType())) {
442            XhtmlNode xhtml;
443            if (property.getDefinition().hasRepresentation(PropertyRepresentation.CDATEXT))
444              xhtml = new CDANarrativeFormat().convert((org.w3c.dom.Element) child);
445            else {
446              XhtmlParser xp = new XhtmlParser();
447              xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
448              if (policy == ValidationPolicy.EVERYTHING) {
449                for (StringPair s : xp.getValidationIssues()) {
450                  logError(errors, "2022-11-17", line(child, false), col(child, false), path, IssueType.INVALID, context.formatMessage(s.getName(), s.getValue()), IssueSeverity.ERROR);                
451                }
452              }
453            }
454            Element n = new Element(property.getName(), property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
455            n.setPath(element.getPath()+"."+property.getName());
456            element.getChildren().add(n);
457          } else {
458            String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
459            String name = child.getLocalName();
460            if (!property.isChoice() && !name.equals(property.getName())) {
461              name = property.getName();
462            }
463            Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
464            if (property.isList()) {
465              n.setPath(element.getPath()+"."+property.getName()+"["+repeatCount+"]");                                    
466            } else {
467              n.setPath(element.getPath()+"."+property.getName());
468            }
469            boolean xsiTypeChecked = false;
470            boolean ok = true;
471            if (property.isChoice()) {
472              if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
473                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
474                if (Utilities.noString(xsiType)) {
475                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype")) {
476                    xsiType = ToolingExtensions.readStringExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype");
477                    n.setType(xsiType);
478                  } else {
479                    logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NO_TYPE_FOUND_ON_, child.getLocalName()), IssueSeverity.ERROR);
480                    ok = false;
481                  }
482                } else {
483                  if (xsiType.contains(":"))
484                    xsiType = xsiType.substring(xsiType.indexOf(":")+1);
485                  n.setType(xsiType);
486                  n.setExplicitType(xsiType);
487                }
488                xsiTypeChecked = true;
489              } else
490                n.setType(n.getType());
491            }
492            checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), xsiTypeChecked);
493            element.getChildren().add(n);
494            if (ok) {
495              if (property.isResource())
496                parseResource(errors, npath, (org.w3c.dom.Element) child, n, property);
497              else
498                parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
499            }
500          }
501        } else {
502          if (cgProp != null) {
503            property = getElementProp(cgProp.getChildProperties(null, null), child.getLocalName(), child.getNamespaceURI());
504            if (property != null) {
505              if (cgProp.getName().equals(lastName)) {
506                repeatCount++;
507              } else {
508                lastName = cgProp.getName();
509                repeatCount = 0;
510              }
511
512              String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
513              String name = cgProp.getName();
514              Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
515              cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
516              element.getChildren().add(cgn);
517
518              npath = npath+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
519              name = child.getLocalName();
520              Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
521              cgn.getChildren().add(n);
522              n.setPath(element.getPath()+"."+property.getName());
523              checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), false);
524              parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
525            }
526          }
527          if (property == null) {
528            logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ELEMENT_, child.getLocalName(), path), IssueSeverity.ERROR);
529          }
530        }
531      } else if (child.getNodeType() == Node.TEXT_NODE && !Utilities.noString(child.getTextContent().trim()) && mtProp != null) {
532        if (cgProp.getName().equals(lastName)) {
533          repeatCount++;
534        } else {
535          lastName = cgProp.getName();
536          repeatCount = 0;
537        }
538
539        String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
540        String name = cgProp.getName();
541        Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
542        cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
543        element.getChildren().add(cgn);
544
545        npath = npath+"/text()";
546        name = mtProp.getName();
547        Element n = new Element(name, mtProp, mtProp.getType(), child.getTextContent().trim()).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
548        cgn.getChildren().add(n);
549        n.setPath(element.getPath()+"."+mtProp.getName());
550
551
552      } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) {
553        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.CDATA_IS_NOT_ALLOWED), IssueSeverity.ERROR);
554      } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
555        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NODE_TYPE__IS_NOT_ALLOWED, Integer.toString(child.getNodeType())), IssueSeverity.ERROR);
556      }
557      child = child.getNextSibling();
558    }
559  }
560
561  private Property getChoiceGroupProp(List<Property> properties) {
562    for (Property p : properties) {
563      if (p.getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
564        return p;
565      }
566    }
567    return null;
568  }
569
570  private boolean validAttrValue(String value) {
571    if (version == null) {
572      return true;
573    }
574    if (version.equals("1.0")) {
575      boolean ok = true;
576      for (char ch : value.toCharArray()) {
577        if (ch <= 0x1F && !Utilities.existsInList(ch, '\r', '\n', '\t')) {
578          ok = false;
579        }
580      }
581      return ok;
582    } else
583      return true;
584  }
585
586
587  private Property getElementProp(List<Property> properties, String nodeName, String namespace) {
588    List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
589    // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
590    // and therefore the longer property names get evaluated first
591    Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
592      @Override
593      public int compare(Property o1, Property o2) {
594        return o2.getName().length() - o1.getName().length();
595      }
596    });
597    // first scan, by namespace
598    for (Property p : propsSortedByLongestFirst) {
599      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
600        if (p.getXmlName().equals(nodeName) && p.getXmlNamespace().equals(namespace)) 
601          return p;
602      }
603    }
604    for (Property p : propsSortedByLongestFirst) {
605      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
606        if (p.getXmlName().equals(nodeName)) 
607          return p;
608        if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
609          return p;
610      }
611    }
612
613
614    return null;
615  }
616
617  private Property getAttrProp(List<Property> properties, String nodeName, String namespace) {
618    for (Property p : properties) {
619      if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && p.getXmlNamespace().equals(namespace)) {
620        return p;
621      }
622    }
623    if (namespace == null) {
624      for (Property p : properties) {
625        if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) {
626          return p;
627        }    
628      }
629    }
630    return null;
631  }
632
633  private Property getTextProp(List<Property> properties) {
634    for (Property p : properties)
635      if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
636        return p;
637    return null;
638  }
639
640  private String convertForDateFormatFromExternal(String fmt, String av) throws FHIRException {
641    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
642      try {
643        DateTimeType d = DateTimeType.parseV3(av);
644        return d.asStringValue();
645      } catch (Exception e) {
646        return av; // not at all clear what to do in this case.
647      }
648    }
649    throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATA_FORMAT_, fmt));
650  }
651
652  private String convertForDateFormatToExternal(String fmt, String av) throws FHIRException {
653    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
654      DateTimeType d = new DateTimeType(av);
655      return d.getAsV3();
656    } else
657      throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATE_FORMAT_, fmt));
658  }
659
660  private void parseResource(List<ValidationMessage> errors, String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
661    org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
662    String name = res.getLocalName();
663    StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name, null));
664    if (sd == null)
665      throw new FHIRFormatError(context.formatMessage(I18nConstants.CONTAINED_RESOURCE_DOES_NOT_APPEAR_TO_BE_A_FHIR_RESOURCE_UNKNOWN_NAME_, res.getLocalName()));
666    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities()), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
667    parent.setType(name);
668    parseChildren(errors, res.getLocalName(), res, parent);
669  }
670
671  private void reapComments(org.w3c.dom.Element element, Element context) {
672    Node node = element.getPreviousSibling();
673    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
674      if (node.getNodeType() == Node.COMMENT_NODE)
675        context.getComments().add(0, node.getTextContent());
676      node = node.getPreviousSibling();
677    }
678    node = element.getLastChild();
679    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
680      node = node.getPreviousSibling();
681    }
682    while (node != null) {
683      if (node.getNodeType() == Node.COMMENT_NODE)
684        context.getComments().add(node.getTextContent());
685      node = node.getNextSibling();
686    }
687  }
688
689  private boolean isAttr(Property property) {
690    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
691      if (r.getValue() == PropertyRepresentation.XMLATTR) {
692        return true;
693      }
694    }
695    return false;
696  }
697
698  private boolean isCdaText(Property property) {
699    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
700      if (r.getValue() == PropertyRepresentation.CDATEXT) {
701        return true;
702      }
703    }
704    return false;
705  }
706
707  private boolean isTypeAttr(Property property) {
708    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
709      if (r.getValue() == PropertyRepresentation.TYPEATTR) {
710        return true;
711      }
712    }
713    return false;
714  }
715
716  private boolean isText(Property property) {
717    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
718      if (r.getValue() == PropertyRepresentation.XMLTEXT) {
719        return true;
720      }
721    }
722    return false;
723  }
724
725  @Override
726  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException {
727    markedXhtml = false;
728    XMLWriter xml = new XMLWriter(stream, "UTF-8");
729    xml.setSortAttributes(false);
730    xml.setPretty(style == OutputStyle.PRETTY);
731    xml.start();
732    if (e.getPath() == null) {
733      e.populatePaths(null);
734    }
735    String ns = e.getProperty().getXmlNamespace();
736    if (ns!=null && !"noNamespace".equals(ns)) {
737      xml.setDefaultNamespace(ns);
738    }
739    if (hasTypeAttr(e))
740      xml.namespace("http://www.w3.org/2001/XMLSchema-instance", "xsi");
741    addNamespaces(xml, e);
742    composeElement(xml, e, e.getType(), true);
743    xml.end();
744  }
745
746  private void addNamespaces(IXMLWriter xml, Element e) throws IOException {
747    String ns = e.getProperty().getXmlNamespace();
748    if (ns!=null && xml.getDefaultNamespace()!=null && !xml.getDefaultNamespace().equals(ns)){
749      if (!xml.namespaceDefined(ns)) {
750        String prefix = pathPrefix(ns);
751        if (prefix.endsWith(":")) {
752          prefix = prefix.substring(0, prefix.length()-1);
753        }
754        if ("?".equals(prefix)) {
755          xml.namespace(ns);
756        } else {
757          xml.namespace(ns, prefix);
758        }
759      }
760    }
761    for (Element c : e.getChildren()) {
762      addNamespaces(xml, c);
763    }
764  }
765
766  private boolean hasTypeAttr(Element e) {
767    if (isTypeAttr(e.getProperty()))
768      return true;
769    for (Element c : e.getChildren()) {
770      if (hasTypeAttr(c))
771        return true;
772    }
773    // xsi_type is always allowed on CDA elements. right now, I'm not sure where to indicate this in the model, 
774    // so it's just hardcoded here 
775    if (e.getType() != null && e.getType().startsWith(Constants.NS_CDA_ROOT)) {
776      return true;
777    }
778    return false;
779  }
780
781  private void setXsiTypeIfIsTypeAttr(IXMLWriter xml, Element element) throws IOException, FHIRException {
782    if (isTypeAttr(element.getProperty()) && !Utilities.noString(element.getType())) {
783      String type = element.getType();
784      if (Utilities.isAbsoluteUrl(type)) {
785        type = type.substring(type.lastIndexOf("/")+1);
786      }
787      xml.attribute("xsi:type",type);    
788    }
789  }
790
791  public void compose(Element e, IXMLWriter xml) throws Exception {
792    if (e.getPath() == null) {
793      e.populatePaths(null);
794    }
795    markedXhtml = false;
796    xml.start();
797    xml.setDefaultNamespace(e.getProperty().getXmlNamespace());
798    if (schemaPath != null) {
799      xml.setSchemaLocation(FormatUtilities.FHIR_NS, Utilities.pathURL(schemaPath, e.fhirType()+".xsd"));
800    }
801    composeElement(xml, e, e.getType(), true);
802    xml.end();
803  }
804
805  private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException, FHIRException {
806    if (showDecorations) {
807      @SuppressWarnings("unchecked")
808      List<ElementDecoration> decorations = (List<ElementDecoration>) element.getUserData("fhir.decorations");
809      if (decorations != null)
810        for (ElementDecoration d : decorations)
811          xml.decorate(d);
812    }
813    for (String s : element.getComments()) {
814      xml.comment(s, true);
815    }
816    if (isText(element.getProperty())) {
817      if (linkResolver != null)
818        xml.link(linkResolver.resolveProperty(element.getProperty()));
819      xml.enter(element.getProperty().getXmlNamespace(),elementName);
820      if (linkResolver != null && element.getProperty().isReference()) {
821        String ref = linkResolver.resolveReference(getReferenceForElement(element));
822        if (ref != null) {
823          xml.externalLink(ref);
824        }
825      }
826      xml.text(element.getValue());
827      xml.exit(element.getProperty().getXmlNamespace(),elementName);   
828    } else if (!element.hasChildren() && !element.hasValue()) {
829      if (element.getExplicitType() != null)
830        xml.attribute("xsi:type", element.getExplicitType());
831      xml.element(elementName);
832    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
833      if (element.getType().equals("xhtml")) {
834        String rawXhtml = element.getValue();
835        if (isCdaText(element.getProperty())) {
836          new CDANarrativeFormat().convert(xml, new XhtmlParser().parseFragment(rawXhtml));
837        } else {
838          xml.escapedText(rawXhtml);
839          if (!markedXhtml) {
840            xml.anchor("end-xhtml");
841            markedXhtml = true;
842          }
843        }
844      } else if (isText(element.getProperty())) {
845        if (linkResolver != null)
846          xml.link(linkResolver.resolveProperty(element.getProperty()));
847        xml.text(element.getValue());
848      } else {
849        setXsiTypeIfIsTypeAttr(xml, element);
850        if (element.hasValue()) {
851          if (linkResolver != null)
852            xml.link(linkResolver.resolveType(element.getType()));
853          xml.attribute("value", element.getValue());
854        }
855        if (linkResolver != null)
856          xml.link(linkResolver.resolveProperty(element.getProperty()));
857        if (element.hasChildren()) {
858          xml.enter(element.getProperty().getXmlNamespace(), elementName);
859          if (linkResolver != null && element.getProperty().isReference()) {
860            String ref = linkResolver.resolveReference(getReferenceForElement(element));
861            if (ref != null) {
862              xml.externalLink(ref);
863            }
864          }
865          for (Element child : element.getChildren()) 
866            composeElement(xml, child, child.getName(), false);
867          xml.exit(element.getProperty().getXmlNamespace(),elementName);
868        } else
869          xml.element(elementName);
870      }
871    } else {
872      setXsiTypeIfIsTypeAttr(xml, element);
873      Set<String> handled = new HashSet<>();
874      for (Element child : element.getChildren()) {
875        if (!handled.contains(child.getName()) && isAttr(child.getProperty()) && wantCompose(element.getPath(), child)) {
876          handled.add(child.getName());
877          String av = child.getValue();
878          if (child.getProperty().isList()) {
879            for (Element c2 : element.getChildren()) {
880              if (c2 != child && c2.getName().equals(child.getName())) {
881                av = av + " "+c2.getValue();
882              }
883            }            
884          }
885          if (linkResolver != null)
886            xml.link(linkResolver.resolveType(child.getType()));
887          if (ToolingExtensions.hasExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
888            av = convertForDateFormatToExternal(ToolingExtensions.readStringExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);
889          xml.attribute(child.getProperty().getXmlNamespace(),child.getProperty().getXmlName(), av);
890        }
891      }
892      if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
893        if (linkResolver != null)
894          xml.link(linkResolver.resolveProperty(element.getProperty()));
895        if (!xml.namespaceDefined(element.getProperty().getXmlNamespace())) {
896          String abbrev = makeNamespaceAbbrev(element.getProperty(), xml);
897          xml.namespace(element.getProperty().getXmlNamespace(), abbrev);
898        }
899        xml.enter(element.getProperty().getXmlNamespace(), elementName);
900      }
901
902      if (!root && element.getSpecial() != null) {
903        if (linkResolver != null)
904          xml.link(linkResolver.resolveProperty(element.getProperty()));
905        xml.enter(element.getProperty().getXmlNamespace(),element.getType());
906      }
907      if (linkResolver != null && element.getProperty().isReference()) {
908        String ref = linkResolver.resolveReference(getReferenceForElement(element));
909        if (ref != null) {
910          xml.externalLink(ref);
911        }
912      }
913      for (Element child : element.getChildren()) {
914        if (wantCompose(element.getPath(), child)) {
915          if (isText(child.getProperty())) {
916            if (linkResolver != null)
917              xml.link(linkResolver.resolveProperty(element.getProperty()));
918            xml.text(child.getValue());
919          } else if (!isAttr(child.getProperty())) {
920            composeElement(xml, child, child.getName(), false);
921          }
922        }
923      }
924      if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
925        if (!root && element.getSpecial() != null)
926          xml.exit(element.getProperty().getXmlNamespace(),element.getType());
927        xml.exit(element.getProperty().getXmlNamespace(),elementName);
928      }
929    }
930  }
931
932  private String makeNamespaceAbbrev(Property property, IXMLWriter xml) {
933    // it's a cosmetic thing, but we're going to try to come up with a nice namespace
934
935    ElementDefinition ed = property.getDefinition();
936    String ns = property.getXmlNamespace();
937    String n = property.getXmlName();
938
939    String diff = property.getName().toLowerCase().replace(n.toLowerCase(), "");
940    if (!Utilities.noString(diff) && diff.length() <= 5 && Utilities.isToken(diff) && !xml.abbreviationDefined(diff)) {
941      return diff;
942    }
943
944    int i = ns.length()-1;
945    while (i > 0) {
946      if (Character.isAlphabetic(ns.charAt(i)) || Character.isDigit(ns.charAt(i))) {
947        i--;
948      } else {
949        break;
950      }
951    }
952    String tail = ns.substring(i+1);
953    if (!Utilities.noString(tail) && tail.length() <= 5 && Utilities.isToken(tail) && !xml.abbreviationDefined(tail)) {
954      return tail;
955    }
956
957    i = 0;
958    while (xml.abbreviationDefined("ns"+i)) {
959      i++;
960    }
961    return "ns"+i;
962  }
963  private String checkHeader(List<ValidationMessage> errors, InputStream stream) throws IOException {
964    try {
965      // the stream will either start with the UTF-8 BOF or with <xml
966      int i0 = stream.read();
967      int i1 = stream.read();
968      int i2 = stream.read();
969
970      StringBuilder b = new StringBuilder();
971      if (i0 == 0xEF && i1 == 0xBB && i2 == 0xBF) {
972        // ok, it's UTF-8
973      } else if (i0 == 0x3C && i1 == 0x3F && i2 == 0x78) { // <xm
974        b.append((char) i0);
975        b.append((char) i1);
976        b.append((char) i2);
977      } else if (i0 == 60) { // just plain old XML with no header
978        return "1.0";        
979      } else {
980        throw new Exception(context.formatMessage(I18nConstants.XML_ENCODING_INVALID));
981      }
982      int i = stream.read();
983      do {
984        b.append((char) i);
985        i = stream.read();
986      } while (i != 0x3E);
987      String header = b.toString();
988      String e = null;
989      i = header.indexOf("encoding=\"");
990      if (i > -1) {
991        e = header.substring(i+10, i+15);
992      } else {
993        i = header.indexOf("encoding='");
994        if (i > -1) {
995          e = header.substring(i+10, i+15);
996        } 
997      }
998      if (e != null && !"UTF-8".equalsIgnoreCase(e)) {
999        logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, context.formatMessage(I18nConstants.XML_ENCODING_INVALID), IssueSeverity.ERROR);
1000      }
1001
1002      i = header.indexOf("version=\"");
1003      if (i > -1) {
1004        return header.substring(i+9, i+12);
1005      } else {
1006        i = header.indexOf("version='");
1007        if (i > -1) {
1008          return header.substring(i+9, i+12);          
1009        } 
1010      }
1011      return "?xml-p1?";
1012    } catch (Exception e) {
1013      // suppress this error 
1014      logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, e.getMessage(), IssueSeverity.ERROR);
1015    }
1016    return "?xml-p2?";
1017  }
1018
1019  class NullErrorHandler implements ErrorHandler {
1020    @Override
1021    public void fatalError(SAXParseException e) {
1022      // do nothing
1023    }
1024
1025    @Override
1026    public void error(SAXParseException e) {
1027      // do nothing
1028    }
1029
1030    @Override
1031    public void warning(SAXParseException e) {
1032      // do nothing
1033    }
1034  }
1035}