001package org.hl7.fhir.r5.elementmodel;
002
003import java.io.ByteArrayInputStream;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011
012 * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014 * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017 * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031
032 */
033
034
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.OutputStream;
038import java.util.ArrayList;
039import java.util.Collections;
040import java.util.Comparator;
041import java.util.HashSet;
042import java.util.List;
043import java.util.Set;
044
045import javax.xml.parsers.DocumentBuilder;
046import javax.xml.parsers.DocumentBuilderFactory;
047import javax.xml.parsers.SAXParser;
048import javax.xml.parsers.SAXParserFactory;
049import javax.xml.transform.Transformer;
050import javax.xml.transform.TransformerFactory;
051import javax.xml.transform.dom.DOMResult;
052import javax.xml.transform.sax.SAXSource;
053
054import org.hl7.fhir.exceptions.DefinitionException;
055import org.hl7.fhir.exceptions.FHIRException;
056import org.hl7.fhir.exceptions.FHIRFormatError;
057import org.hl7.fhir.r5.conformance.profile.ProfileUtilities;
058import org.hl7.fhir.r5.context.IWorkerContext;
059import org.hl7.fhir.r5.elementmodel.Element.SpecialElement;
060import org.hl7.fhir.r5.elementmodel.Manager.FhirFormat;
061import org.hl7.fhir.r5.formats.FormatUtilities;
062import org.hl7.fhir.r5.formats.IParser.OutputStyle;
063import org.hl7.fhir.r5.model.Constants;
064import org.hl7.fhir.r5.model.DateTimeType;
065import org.hl7.fhir.r5.model.ElementDefinition;
066import org.hl7.fhir.r5.model.ElementDefinition.PropertyRepresentation;
067import org.hl7.fhir.r5.model.Enumeration;
068import org.hl7.fhir.r5.model.StructureDefinition;
069import org.hl7.fhir.r5.utils.ToolingExtensions;
070import org.hl7.fhir.r5.utils.formats.XmlLocationAnnotator;
071import org.hl7.fhir.r5.utils.formats.XmlLocationData;
072import org.hl7.fhir.utilities.ElementDecoration;
073import org.hl7.fhir.utilities.StringPair;
074import org.hl7.fhir.utilities.TextFile;
075import org.hl7.fhir.utilities.Utilities;
076import org.hl7.fhir.utilities.i18n.I18nConstants;
077import org.hl7.fhir.utilities.validation.ValidationMessage;
078import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
079import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
080import org.hl7.fhir.utilities.xhtml.CDANarrativeFormat;
081import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
082import org.hl7.fhir.utilities.xhtml.XhtmlNode;
083import org.hl7.fhir.utilities.xhtml.XhtmlParser;
084import org.hl7.fhir.utilities.xml.IXMLWriter;
085import org.hl7.fhir.utilities.xml.XMLUtil;
086import org.hl7.fhir.utilities.xml.XMLWriter;
087import org.w3c.dom.Document;
088import org.w3c.dom.Node;
089import org.xml.sax.ErrorHandler;
090import org.xml.sax.InputSource;
091import org.xml.sax.SAXParseException;
092import org.xml.sax.XMLReader;
093
094public class XmlParser extends ParserBase {
095  private boolean allowXsiLocation;
096  private String version;
097
098  public XmlParser(IWorkerContext context) {
099    super(context);
100  }
101
102  private String schemaPath;
103  private boolean markedXhtml;
104
105  public String getSchemaPath() {
106    return schemaPath;
107  }
108  public void setSchemaPath(String schemaPath) {
109    this.schemaPath = schemaPath;
110  }
111
112  public boolean isAllowXsiLocation() {
113    return allowXsiLocation;
114  }
115
116  public void setAllowXsiLocation(boolean allowXsiLocation) {
117    this.allowXsiLocation = allowXsiLocation;
118  }
119
120  public List<ValidatedFragment> parse(InputStream inStream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
121
122    byte[] content = TextFile.streamToBytes(inStream);
123    ValidatedFragment focusFragment = new ValidatedFragment(ValidatedFragment.FOCUS_NAME, "xml", content, false);
124
125    ByteArrayInputStream stream = new ByteArrayInputStream(content);
126    Document doc = null;
127    try {
128      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
129      // xxe protection
130      factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
131      factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
132      factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
133      factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
134      factory.setXIncludeAware(false);
135      factory.setExpandEntityReferences(false);
136
137      factory.setNamespaceAware(true);
138      if (policy == ValidationPolicy.EVERYTHING) {
139        // The SAX interface appears to not work when reporting the correct version/encoding.
140        // if we can, we'll inspect the header/encoding ourselves 
141
142        stream.mark(1024);
143        version = checkHeader(focusFragment.getErrors(), stream);
144        stream.reset();
145
146        // use a slower parser that keeps location data
147        TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory();
148        Transformer nullTransformer = transformerFactory.newTransformer();
149        DocumentBuilder docBuilder = factory.newDocumentBuilder();
150        doc = docBuilder.newDocument();
151        DOMResult domResult = new DOMResult(doc);
152        SAXParserFactory spf = SAXParserFactory.newInstance();
153        spf.setNamespaceAware(true);
154        spf.setValidating(false);
155        // xxe protection
156        spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
157        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
158        SAXParser saxParser = spf.newSAXParser();
159        XMLReader xmlReader = saxParser.getXMLReader();
160        // xxe protection
161        xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
162        xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
163
164        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
165        InputSource inputSource = new InputSource(stream);
166        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
167        nullTransformer.transform(saxSource, domResult);
168      } else {
169        DocumentBuilder builder = factory.newDocumentBuilder();
170        builder.setErrorHandler(new NullErrorHandler());
171        doc = builder.parse(stream);
172      }
173    } catch (Exception e) {
174      if (e.getMessage().contains("lineNumber:") && e.getMessage().contains("columnNumber:")) {
175        int line = Utilities.parseInt(extractVal(e.getMessage(), "lineNumber"), 0); 
176        int col = Utilities.parseInt(extractVal(e.getMessage(), "columnNumber"), 0); 
177        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, line, col, "(xml)", IssueType.INVALID, e.getMessage().substring(e.getMessage().lastIndexOf(";")+1).trim(), IssueSeverity.FATAL);
178      } else {
179        logError(focusFragment.getErrors(), ValidationMessage.NO_RULE_DATE, 0, 0, "(xml)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
180      }
181      doc = null;
182    }
183    if (doc != null) {
184      focusFragment.setElement(parse(focusFragment.getErrors(), doc));
185    }
186    List<ValidatedFragment> res = new ArrayList<>();
187    res.add(focusFragment);
188    return res;
189  }
190
191
192  private String extractVal(String src, String name) {
193    src = src.substring(src.indexOf(name)+name.length()+1);
194    src = src.substring(0, src.indexOf(";")).trim();
195    return src;
196  }
197  private void checkForProcessingInstruction(List<ValidationMessage> errors, Document document) throws FHIRFormatError {
198    if (policy == ValidationPolicy.EVERYTHING && FormatUtilities.FHIR_NS.equals(document.getDocumentElement().getNamespaceURI())) {
199      Node node = document.getFirstChild();
200      while (node != null) {
201        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
202          logError(errors, ValidationMessage.NO_RULE_DATE, line(document, false), col(document, false), "(document)", IssueType.INVALID, context.formatMessage(
203              I18nConstants.NO_PROCESSING_INSTRUCTIONS_ALLOWED_IN_RESOURCES), IssueSeverity.ERROR);
204        node = node.getNextSibling();
205      }
206    }
207  }
208
209
210  private int line(Node node, boolean end) {
211    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
212    return loc == null ? 0 : end ? loc.getEndLine() : loc.getStartLine();
213  }
214
215  private int col(Node node, boolean end) {
216    XmlLocationData loc = node == null ? null : (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
217    return loc == null ? 0 : end ? loc.getEndColumn() : loc.getStartColumn();
218  }
219
220  public Element parse(List<ValidationMessage> errors, Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
221    checkForProcessingInstruction(errors, doc);
222    org.w3c.dom.Element element = doc.getDocumentElement();
223    return parse(errors, element);
224  }
225
226  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
227    String ns = element.getNamespaceURI();
228    String name = element.getLocalName();
229    String path = "/"+pathPrefix(ns)+name;
230
231    StructureDefinition sd = getDefinition(errors, line(element, false), col(element, false), (ns == null ? "noNamespace" : ns), name);
232    if (sd == null)
233      return null;
234
235    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML);
236    result.setPath(element.getLocalName());
237    checkElement(errors, element, result, path, result.getProperty(), false);
238    result.markLocation(line(element, false), col(element, false));
239    result.setType(element.getLocalName());
240    parseChildren(errors, path, element, result);
241    result.numberChildren();
242    return result;
243  }
244
245  private String pathPrefix(String ns) {
246    if (Utilities.noString(ns))
247      return "";
248    if (ns.equals(FormatUtilities.FHIR_NS))
249      return "f:";
250    if (ns.equals(FormatUtilities.XHTML_NS))
251      return "h:";
252    if (ns.equals("urn:hl7-org:v3"))
253      return "v3:";
254    if (ns.equals("urn:hl7-org:sdtc")) 
255      return "sdtc:";
256    if (ns.equals("urn:ihe:pharm"))
257      return "pharm:";
258    if (ns.equals("http://ns.electronichealth.net.au/Ci/Cda/Extensions/3.0"))
259      return "ext:";
260    return "?:";
261  }
262
263  private boolean empty(org.w3c.dom.Element element) {
264    for (int i = 0; i < element.getAttributes().getLength(); i++) {
265      String n = element.getAttributes().item(i).getNodeName();
266      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
267        return false;
268    }
269    if (!Utilities.noString(element.getTextContent().trim()))
270      return false;
271
272    Node n = element.getFirstChild();
273    while (n != null) {
274      if (n.getNodeType() == Node.ELEMENT_NODE)
275        return false;
276      n = n.getNextSibling();
277    }
278    return true;
279  }
280
281  private void checkElement(List<ValidationMessage> errors, org.w3c.dom.Element element, Element e, String path, Property prop, boolean xsiTypeChecked) throws FHIRFormatError {
282    if (policy == ValidationPolicy.EVERYTHING) {
283      if (empty(element) && FormatUtilities.FHIR_NS.equals(element.getNamespaceURI())) // this rule only applies to FHIR Content
284        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.ELEMENT_MUST_HAVE_SOME_CONTENT), IssueSeverity.ERROR);
285      String ns = prop.getXmlNamespace();
286      String elementNs = element.getNamespaceURI();
287      if (elementNs == null) {
288        elementNs = "noNamespace";
289      }
290      if (!elementNs.equals(ns)) {
291        logError(errors, ValidationMessage.NO_RULE_DATE, line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.WRONG_NAMESPACE__EXPECTED_, ns), IssueSeverity.ERROR);
292      }
293      if (!xsiTypeChecked) {
294        String xsiType = element.getAttributeNS(FormatUtilities.NS_XSI, "type");
295        if (!Utilities.noString(xsiType)) {
296          String actualType = prop.getXmlTypeName();
297          if (xsiType.equals(actualType)) {
298            logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_UNNECESSARY), IssueSeverity.INFORMATION);            
299          } else {
300            StructureDefinition sd = findLegalConstraint(xsiType, actualType);
301            if (sd != null) {
302              e.setType(sd.getType());
303              e.setExplicitType(xsiType);
304            } else {
305              logError(errors, "2023-10-12", line(element, false), col(element, false), path, IssueType.INVALID, context.formatMessage(I18nConstants.XSI_TYPE_WRONG, xsiType, actualType), IssueSeverity.ERROR);           
306            }  
307          }
308        }
309      }
310    }
311  }
312
313  private StructureDefinition findLegalConstraint(String xsiType, String actualType) {
314    StructureDefinition sdA = context.fetchTypeDefinition(actualType);
315    StructureDefinition sd = context.fetchTypeDefinition(xsiType);
316    while (sd != null) {
317      if (sd == sdA) {
318        return sd;
319      }
320      sd = context.fetchResource(StructureDefinition.class, sd.getBaseDefinition());
321    }
322    return null;
323  }
324
325  public Element parse(List<ValidationMessage> errors, org.w3c.dom.Element base, String type) throws Exception {
326    StructureDefinition sd = getDefinition(errors, 0, 0, FormatUtilities.FHIR_NS, type);
327    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities())).setFormat(FhirFormat.XML).setNativeObject(base);
328    result.setPath(base.getLocalName());
329    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
330    checkElement(errors, base, result, path, result.getProperty(), false);
331    result.setType(base.getLocalName());
332    parseChildren(errors, path, base, result);
333    result.numberChildren();
334    return result;
335  }
336
337  private void parseChildren(List<ValidationMessage> errors, String path, org.w3c.dom.Element node, Element element) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
338    // this parsing routine retains the original order in a the XML file, to support validation
339    reapComments(node, element);
340    List<Property> properties = element.getProperty().getChildProperties(element.getName(), XMLUtil.getXsiType(node));
341    Property cgProp = getChoiceGroupProp(properties);
342    Property mtProp = cgProp == null ? null : getTextProp(cgProp.getChildProperties(null, null));
343
344    String text = mtProp == null ? XMLUtil.getDirectText(node).trim() : null;
345    int line = line(node, false);
346    int col = col(node, false);
347    if (!Utilities.noString(text)) {
348      Property property = getTextProp(properties);
349      if (property != null) {
350        if ("ED.data[x]".equals(property.getDefinition().getId()) || (property.getDefinition()!=null && property.getDefinition().getBase()!=null && "ED.data[x]".equals(property.getDefinition().getBase().getPath()))) {
351          if ("B64".equals(node.getAttribute("representation"))) {
352            Element n = new Element("dataBase64Binary", property, "base64Binary", text).markLocation(line, col).setFormat(FhirFormat.XML);
353            n.setPath(element.getPath()+"."+property.getName());
354            element.getChildren().add(n);
355          } else {
356            Element n = new Element("dataString", property, "string", text).markLocation(line, col).setFormat(FhirFormat.XML);
357            n.setPath(element.getPath()+"."+property.getName());
358            element.getChildren().add(n);
359          }
360        } else {
361          Element n = new Element(property.getName(), property, property.getType(), text).markLocation(line, col).setFormat(FhirFormat.XML);
362          n.setPath(element.getPath()+"."+property.getName());
363          element.getChildren().add(n);
364        }
365      } else {
366        Node n = node.getFirstChild();
367        while (n != null) {
368          if (n.getNodeType() == Node.TEXT_NODE && !Utilities.noString(n.getTextContent().trim())) {
369            Node nt = n; // try to find the nearest element for a line/col location
370            boolean end = false;
371            while (nt.getPreviousSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
372              nt = nt.getPreviousSibling();
373              end = true;
374            }
375            while (nt.getNextSibling() != null && nt.getNodeType() != Node.ELEMENT_NODE) {
376              nt = nt.getNextSibling();
377              end = false;
378            }
379            line = line(nt, end);
380            col = col(nt, end);
381            logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.TEXT_SHOULD_NOT_BE_PRESENT, Utilities.makeSingleLine(n.getTextContent().trim())), IssueSeverity.ERROR);
382          }
383          n = n.getNextSibling();
384        }
385      }                 
386    }
387
388    for (int i = 0; i < node.getAttributes().getLength(); i++) {
389      Node attr = node.getAttributes().item(i);
390      String value = attr.getNodeValue();
391      if (!validAttrValue(value)) {
392        logError(errors, ValidationMessage.NO_RULE_DATE, line, col, path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.XML_ATTR_VALUE_INVALID, attr.getNodeName()), IssueSeverity.ERROR);
393      }
394      if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
395        Property property = getAttrProp(properties, attr.getLocalName(), attr.getNamespaceURI());
396        if (property != null) {
397          String av = attr.getNodeValue();
398          if (ToolingExtensions.hasExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
399            av = convertForDateFormatFromExternal(ToolingExtensions.readStringExtension(property.getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);          
400          if (property.getName().equals("value") && element.isPrimitive())
401            element.setValue(av);
402          else {
403            String[] vl = {av};
404            if (property.isList() && av.contains(" ")) {
405              vl = av.split(" ");
406            }
407            for (String v : vl) {
408              Element n = new Element(property.getName(), property, property.getType(), v).markLocation(line, col).setFormat(FhirFormat.XML);
409              n.setPath(element.getPath()+"."+property.getName());
410              element.getChildren().add(n);
411            }
412          }
413        } else {
414          boolean ok = false;
415          if (FormatUtilities.FHIR_NS.equals(node.getNamespaceURI())) {
416            if (attr.getLocalName().equals("schemaLocation") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())) {
417              ok = ok || allowXsiLocation; 
418            }
419          } else
420            ok = ok || (attr.getLocalName().equals("schemaLocation")); // xsi:schemalocation allowed for non FHIR content
421          ok = ok || (hasTypeAttr(element) && attr.getLocalName().equals("type") && FormatUtilities.NS_XSI.equals(attr.getNamespaceURI())); // xsi:type allowed if element says so
422          if (!ok) { 
423            logError(errors, ValidationMessage.NO_RULE_DATE, line(node, false), col(node, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ATTRIBUTE__ON__FOR_TYPE__PROPERTIES__, attr.getNodeName(), node.getNodeName(), element.fhirType(), properties), IssueSeverity.ERROR);
424          }
425        }
426      }
427    }
428
429    String lastName = null;
430    int repeatCount = 0;
431    Node child = node.getFirstChild();
432    while (child != null) {
433      if (child.getNodeType() == Node.ELEMENT_NODE) {
434        Property property = getElementProp(properties, child.getLocalName(), child.getNamespaceURI());
435
436        if (property != null) {
437          if (property.getName().equals(lastName)) {
438            repeatCount++;
439          } else {
440            lastName = property.getName();
441            repeatCount = 0;
442          }
443          if (!property.isChoice() && "xhtml".equals(property.getType())) {
444            XhtmlNode xhtml;
445            if (property.getDefinition().hasRepresentation(PropertyRepresentation.CDATEXT))
446              xhtml = new CDANarrativeFormat().convert((org.w3c.dom.Element) child);
447            else {
448              XhtmlParser xp = new XhtmlParser();
449              xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
450              if (policy == ValidationPolicy.EVERYTHING) {
451                for (StringPair s : xp.getValidationIssues()) {
452                  logError(errors, "2022-11-17", line(child, false), col(child, false), path, IssueType.INVALID, context.formatMessage(s.getName(), s.getValue()), IssueSeverity.ERROR);                
453                }
454              }
455            }
456            Element n = new Element(property.getName(), property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
457            n.setPath(element.getPath()+"."+property.getName());
458            element.getChildren().add(n);
459          } else {
460            String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
461            String name = child.getLocalName();
462            if (!property.isChoice() && !name.equals(property.getName())) {
463              name = property.getName();
464            }
465            Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
466            if (property.isList()) {
467              n.setPath(element.getPath()+"."+property.getName()+"["+repeatCount+"]");                                    
468            } else {
469              n.setPath(element.getPath()+"."+property.getName());
470            }
471            boolean xsiTypeChecked = false;
472            boolean ok = true;
473            if (property.isChoice()) {
474              if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
475                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
476                if (Utilities.noString(xsiType)) {
477                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype")) {
478                    xsiType = ToolingExtensions.readStringExtension(property.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-defaulttype");
479                    n.setType(xsiType);
480                  } else {
481                    logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NO_TYPE_FOUND_ON_, child.getLocalName()), IssueSeverity.ERROR);
482                    ok = false;
483                  }
484                } else {
485                  if (xsiType.contains(":"))
486                    xsiType = xsiType.substring(xsiType.indexOf(":")+1);
487                  n.setType(xsiType);
488                  n.setExplicitType(xsiType);
489                }
490                xsiTypeChecked = true;
491              } else
492                n.setType(n.getType());
493            }
494            checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), xsiTypeChecked);
495            element.getChildren().add(n);
496            if (ok) {
497              if (property.isResource())
498                parseResource(errors, npath, (org.w3c.dom.Element) child, n, property);
499              else
500                parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
501            }
502          }
503        } else {
504          if (cgProp != null) {
505            property = getElementProp(cgProp.getChildProperties(null, null), child.getLocalName(), child.getNamespaceURI());
506            if (property != null) {
507              if (cgProp.getName().equals(lastName)) {
508                repeatCount++;
509              } else {
510                lastName = cgProp.getName();
511                repeatCount = 0;
512              }
513
514              String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
515              String name = cgProp.getName();
516              Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
517              cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
518              element.getChildren().add(cgn);
519
520              npath = npath+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
521              name = child.getLocalName();
522              Element n = new Element(name, property).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
523              cgn.getChildren().add(n);
524              n.setPath(element.getPath()+"."+property.getName());
525              checkElement(errors, (org.w3c.dom.Element) child, n, npath, n.getProperty(), false);
526              parseChildren(errors, npath, (org.w3c.dom.Element) child, n);
527            }
528          }
529          if (property == null) {
530            logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.UNDEFINED_ELEMENT_, child.getLocalName(), path), IssueSeverity.ERROR);
531          }
532        }
533      } else if (child.getNodeType() == Node.TEXT_NODE && !Utilities.noString(child.getTextContent().trim()) && mtProp != null) {
534        if (cgProp.getName().equals(lastName)) {
535          repeatCount++;
536        } else {
537          lastName = cgProp.getName();
538          repeatCount = 0;
539        }
540
541        String npath = path+"/"+pathPrefix(cgProp.getXmlNamespace())+cgProp.getName();
542        String name = cgProp.getName();
543        Element cgn = new Element(cgProp.getName(), cgProp).setFormat(FhirFormat.XML);
544        cgn.setPath(element.getPath()+"."+cgProp.getName()+"["+repeatCount+"]"); 
545        element.getChildren().add(cgn);
546
547        npath = npath+"/text()";
548        name = mtProp.getName();
549        Element n = new Element(name, mtProp, mtProp.getType(), child.getTextContent().trim()).markLocation(line(child, false), col(child, false)).setFormat(FhirFormat.XML).setNativeObject(child);
550        cgn.getChildren().add(n);
551        n.setPath(element.getPath()+"."+mtProp.getName());
552
553
554      } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) {
555        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.CDATA_IS_NOT_ALLOWED), IssueSeverity.ERROR);
556      } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
557        logError(errors, ValidationMessage.NO_RULE_DATE, line(child, false), col(child, false), path, IssueType.STRUCTURE, context.formatMessage(I18nConstants.NODE_TYPE__IS_NOT_ALLOWED, Integer.toString(child.getNodeType())), IssueSeverity.ERROR);
558      }
559      child = child.getNextSibling();
560    }
561  }
562
563  private Property getChoiceGroupProp(List<Property> properties) {
564    for (Property p : properties) {
565      if (p.getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
566        return p;
567      }
568    }
569    return null;
570  }
571
572  private boolean validAttrValue(String value) {
573    if (version == null) {
574      return true;
575    }
576    if (version.equals("1.0")) {
577      boolean ok = true;
578      for (char ch : value.toCharArray()) {
579        if (ch <= 0x1F && !Utilities.existsInList(ch, '\r', '\n', '\t')) {
580          ok = false;
581        }
582      }
583      return ok;
584    } else
585      return true;
586  }
587
588
589  private Property getElementProp(List<Property> properties, String nodeName, String namespace) {
590    List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
591    // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
592    // and therefore the longer property names get evaluated first
593    Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
594      @Override
595      public int compare(Property o1, Property o2) {
596        return o2.getName().length() - o1.getName().length();
597      }
598    });
599    // first scan, by namespace
600    for (Property p : propsSortedByLongestFirst) {
601      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
602        if (p.getXmlName().equals(nodeName) && p.getXmlNamespace().equals(namespace)) 
603          return p;
604      }
605    }
606    for (Property p : propsSortedByLongestFirst) {
607      if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
608        if (p.getXmlName().equals(nodeName)) 
609          return p;
610        if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
611          return p;
612      }
613    }
614
615
616    return null;
617  }
618
619  private Property getAttrProp(List<Property> properties, String nodeName, String namespace) {
620    for (Property p : properties) {
621      if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && p.getXmlNamespace().equals(namespace)) {
622        return p;
623      }
624    }
625    if (namespace == null) {
626      for (Property p : properties) {
627        if (p.getXmlName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) {
628          return p;
629        }    
630      }
631    }
632    return null;
633  }
634
635  private Property getTextProp(List<Property> properties) {
636    for (Property p : properties)
637      if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
638        return p;
639    return null;
640  }
641
642  private String convertForDateFormatFromExternal(String fmt, String av) throws FHIRException {
643    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
644      try {
645        DateTimeType d = DateTimeType.parseV3(av);
646        return d.asStringValue();
647      } catch (Exception e) {
648        return av; // not at all clear what to do in this case.
649      }
650    }
651    throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATA_FORMAT_, fmt));
652  }
653
654  private String convertForDateFormatToExternal(String fmt, String av) throws FHIRException {
655    if ("v3".equals(fmt) || "YYYYMMDDHHMMSS.UUUU[+|-ZZzz]".equals(fmt)) {
656      DateTimeType d = new DateTimeType(av);
657      return d.getAsV3();
658    } else
659      throw new FHIRException(context.formatMessage(I18nConstants.UNKNOWN_DATE_FORMAT_, fmt));
660  }
661
662  private void parseResource(List<ValidationMessage> errors, String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
663    org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
664    String name = res.getLocalName();
665    StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name, null));
666    if (sd == null)
667      throw new FHIRFormatError(context.formatMessage(I18nConstants.CONTAINED_RESOURCE_DOES_NOT_APPEAR_TO_BE_A_FHIR_RESOURCE_UNKNOWN_NAME_, res.getLocalName()));
668    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd, getProfileUtilities(), getContextUtilities()), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
669    parent.setType(name);
670    parseChildren(errors, res.getLocalName(), res, parent);
671  }
672
673  private void reapComments(org.w3c.dom.Element element, Element context) {
674    Node node = element.getPreviousSibling();
675    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
676      if (node.getNodeType() == Node.COMMENT_NODE)
677        context.getComments().add(0, node.getTextContent());
678      node = node.getPreviousSibling();
679    }
680    node = element.getLastChild();
681    while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
682      node = node.getPreviousSibling();
683    }
684    while (node != null) {
685      if (node.getNodeType() == Node.COMMENT_NODE)
686        context.getComments().add(node.getTextContent());
687      node = node.getNextSibling();
688    }
689  }
690
691  private boolean isAttr(Property property) {
692    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
693      if (r.getValue() == PropertyRepresentation.XMLATTR) {
694        return true;
695      }
696    }
697    return false;
698  }
699
700  private boolean isCdaText(Property property) {
701    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
702      if (r.getValue() == PropertyRepresentation.CDATEXT) {
703        return true;
704      }
705    }
706    return false;
707  }
708
709  private boolean isTypeAttr(Property property) {
710    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
711      if (r.getValue() == PropertyRepresentation.TYPEATTR) {
712        return true;
713      }
714    }
715    return false;
716  }
717
718  private boolean isText(Property property) {
719    for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
720      if (r.getValue() == PropertyRepresentation.XMLTEXT) {
721        return true;
722      }
723    }
724    return false;
725  }
726
727  @Override
728  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException {
729    markedXhtml = false;
730    XMLWriter xml = new XMLWriter(stream, "UTF-8");
731    xml.setSortAttributes(false);
732    xml.setPretty(style == OutputStyle.PRETTY);
733    xml.start();
734    if (e.getPath() == null) {
735      e.populatePaths(null);
736    }
737    String ns = e.getProperty().getXmlNamespace();
738    if (ns!=null && !"noNamespace".equals(ns)) {
739      xml.setDefaultNamespace(ns);
740    }
741    if (hasTypeAttr(e))
742      xml.namespace("http://www.w3.org/2001/XMLSchema-instance", "xsi");
743    addNamespaces(xml, e);
744    composeElement(xml, e, e.getType(), true);
745    xml.end();
746  }
747
748  private void addNamespaces(IXMLWriter xml, Element e) throws IOException {
749    String ns = e.getProperty().getXmlNamespace();
750    if (ns!=null && xml.getDefaultNamespace()!=null && !xml.getDefaultNamespace().equals(ns)){
751      if (!xml.namespaceDefined(ns)) {
752        String prefix = pathPrefix(ns);
753        if (prefix.endsWith(":")) {
754          prefix = prefix.substring(0, prefix.length()-1);
755        }
756        if ("?".equals(prefix)) {
757          xml.namespace(ns);
758        } else {
759          xml.namespace(ns, prefix);
760        }
761      }
762    }
763    for (Element c : e.getChildren()) {
764      addNamespaces(xml, c);
765    }
766  }
767
768  private boolean hasTypeAttr(Element e) {
769    if (isTypeAttr(e.getProperty()))
770      return true;
771    for (Element c : e.getChildren()) {
772      if (hasTypeAttr(c))
773        return true;
774    }
775    // xsi_type is always allowed on CDA elements. right now, I'm not sure where to indicate this in the model, 
776    // so it's just hardcoded here 
777    if (e.getType() != null && e.getType().startsWith(Constants.NS_CDA_ROOT)) {
778      return true;
779    }
780    return false;
781  }
782
783  private void setXsiTypeIfIsTypeAttr(IXMLWriter xml, Element element) throws IOException, FHIRException {
784    if (isTypeAttr(element.getProperty()) && !Utilities.noString(element.getType())) {
785      String type = element.getType();
786      if (Utilities.isAbsoluteUrl(type)) {
787        type = type.substring(type.lastIndexOf("/")+1);
788      }
789      xml.attribute("xsi:type",type);    
790    }
791  }
792
793  public void compose(Element e, IXMLWriter xml) throws Exception {
794    if (e.getPath() == null) {
795      e.populatePaths(null);
796    }
797    markedXhtml = false;
798    xml.start();
799    xml.setDefaultNamespace(e.getProperty().getXmlNamespace());
800    if (schemaPath != null) {
801      xml.setSchemaLocation(FormatUtilities.FHIR_NS, Utilities.pathURL(schemaPath, e.fhirType()+".xsd"));
802    }
803    composeElement(xml, e, e.getType(), true);
804    xml.end();
805  }
806
807  private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException, FHIRException {
808    if (showDecorations) {
809      @SuppressWarnings("unchecked")
810      List<ElementDecoration> decorations = (List<ElementDecoration>) element.getUserData("fhir.decorations");
811      if (decorations != null)
812        for (ElementDecoration d : decorations)
813          xml.decorate(d);
814    }
815    for (String s : element.getComments()) {
816      xml.comment(s, true);
817    }
818    if (isText(element.getProperty())) {
819      if (linkResolver != null)
820        xml.link(linkResolver.resolveProperty(element.getProperty()));
821      xml.enter(element.getProperty().getXmlNamespace(),elementName);
822      if (linkResolver != null && element.getProperty().isReference()) {
823        String ref = linkResolver.resolveReference(getReferenceForElement(element));
824        if (ref != null) {
825          xml.externalLink(ref);
826        }
827      }
828      xml.text(element.getValue());
829      xml.exit(element.getProperty().getXmlNamespace(),elementName);   
830    } else if (!element.hasChildren() && !element.hasValue()) {
831      if (element.getExplicitType() != null)
832        xml.attribute("xsi:type", element.getExplicitType());
833      xml.element(elementName);
834    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
835      if (element.getType().equals("xhtml")) {
836        String rawXhtml = element.getValue();
837        if (isCdaText(element.getProperty())) {
838          new CDANarrativeFormat().convert(xml, new XhtmlParser().parseFragment(rawXhtml));
839        } else {
840          xml.escapedText(rawXhtml);
841          if (!markedXhtml) {
842            xml.anchor("end-xhtml");
843            markedXhtml = true;
844          }
845        }
846      } else if (isText(element.getProperty())) {
847        if (linkResolver != null)
848          xml.link(linkResolver.resolveProperty(element.getProperty()));
849        xml.text(element.getValue());
850      } else {
851        setXsiTypeIfIsTypeAttr(xml, element);
852        if (element.hasValue()) {
853          if (linkResolver != null)
854            xml.link(linkResolver.resolveType(element.getType()));
855          xml.attribute("value", element.getValue());
856        }
857        if (linkResolver != null)
858          xml.link(linkResolver.resolveProperty(element.getProperty()));
859        if (element.hasChildren()) {
860          xml.enter(element.getProperty().getXmlNamespace(), elementName);
861          if (linkResolver != null && element.getProperty().isReference()) {
862            String ref = linkResolver.resolveReference(getReferenceForElement(element));
863            if (ref != null) {
864              xml.externalLink(ref);
865            }
866          }
867          for (Element child : element.getChildren()) 
868            composeElement(xml, child, child.getName(), false);
869          xml.exit(element.getProperty().getXmlNamespace(),elementName);
870        } else
871          xml.element(elementName);
872      }
873    } else {
874      setXsiTypeIfIsTypeAttr(xml, element);
875      Set<String> handled = new HashSet<>();
876      for (Element child : element.getChildren()) {
877        if (!handled.contains(child.getName()) && isAttr(child.getProperty()) && wantCompose(element.getPath(), child)) {
878          handled.add(child.getName());
879          String av = child.getValue();
880          if (child.getProperty().isList()) {
881            for (Element c2 : element.getChildren()) {
882              if (c2 != child && c2.getName().equals(child.getName())) {
883                av = av + " "+c2.getValue();
884              }
885            }            
886          }
887          if (linkResolver != null)
888            xml.link(linkResolver.resolveType(child.getType()));
889          if (ToolingExtensions.hasExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT))
890            av = convertForDateFormatToExternal(ToolingExtensions.readStringExtension(child.getProperty().getDefinition(), ToolingExtensions.EXT_DATE_FORMAT), av);
891          xml.attribute(child.getProperty().getXmlNamespace(),child.getProperty().getXmlName(), av);
892        }
893      }
894      if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
895        if (linkResolver != null)
896          xml.link(linkResolver.resolveProperty(element.getProperty()));
897        if (!xml.namespaceDefined(element.getProperty().getXmlNamespace())) {
898          String abbrev = makeNamespaceAbbrev(element.getProperty(), xml);
899          xml.namespace(element.getProperty().getXmlNamespace(), abbrev);
900        }
901        xml.enter(element.getProperty().getXmlNamespace(), elementName);
902      }
903
904      if (!root && element.getSpecial() != null) {
905        if (linkResolver != null)
906          xml.link(linkResolver.resolveProperty(element.getProperty()));
907        xml.enter(element.getProperty().getXmlNamespace(),element.getType());
908      }
909      if (linkResolver != null && element.getProperty().isReference()) {
910        String ref = linkResolver.resolveReference(getReferenceForElement(element));
911        if (ref != null) {
912          xml.externalLink(ref);
913        }
914      }
915      for (Element child : element.getChildren()) {
916        if (wantCompose(element.getPath(), child)) {
917          if (isText(child.getProperty())) {
918            if (linkResolver != null)
919              xml.link(linkResolver.resolveProperty(element.getProperty()));
920            xml.text(child.getValue());
921          } else if (!isAttr(child.getProperty())) {
922            composeElement(xml, child, child.getName(), false);
923          }
924        }
925      }
926      if (!element.getProperty().getDefinition().hasExtension(ToolingExtensions.EXT_ID_CHOICE_GROUP)) {
927        if (!root && element.getSpecial() != null)
928          xml.exit(element.getProperty().getXmlNamespace(),element.getType());
929        xml.exit(element.getProperty().getXmlNamespace(),elementName);
930      }
931    }
932  }
933
934  private String makeNamespaceAbbrev(Property property, IXMLWriter xml) {
935    // it's a cosmetic thing, but we're going to try to come up with a nice namespace
936
937    ElementDefinition ed = property.getDefinition();
938    String ns = property.getXmlNamespace();
939    String n = property.getXmlName();
940
941    String diff = property.getName().toLowerCase().replace(n.toLowerCase(), "");
942    if (!Utilities.noString(diff) && diff.length() <= 5 && Utilities.isToken(diff) && !xml.abbreviationDefined(diff)) {
943      return diff;
944    }
945
946    int i = ns.length()-1;
947    while (i > 0) {
948      if (Character.isAlphabetic(ns.charAt(i)) || Character.isDigit(ns.charAt(i))) {
949        i--;
950      } else {
951        break;
952      }
953    }
954    String tail = ns.substring(i+1);
955    if (!Utilities.noString(tail) && tail.length() <= 5 && Utilities.isToken(tail) && !xml.abbreviationDefined(tail)) {
956      return tail;
957    }
958
959    i = 0;
960    while (xml.abbreviationDefined("ns"+i)) {
961      i++;
962    }
963    return "ns"+i;
964  }
965  private String checkHeader(List<ValidationMessage> errors, InputStream stream) throws IOException {
966    try {
967      // the stream will either start with the UTF-8 BOF or with <xml
968      int i0 = stream.read();
969      int i1 = stream.read();
970      int i2 = stream.read();
971
972      StringBuilder b = new StringBuilder();
973      if (i0 == 0xEF && i1 == 0xBB && i2 == 0xBF) {
974        // ok, it's UTF-8
975      } else if (i0 == 0x3C && i1 == 0x3F && i2 == 0x78) { // <xm
976        b.append((char) i0);
977        b.append((char) i1);
978        b.append((char) i2);
979      } else if (i0 == 60) { // just plain old XML with no header
980        return "1.0";        
981      } else {
982        throw new Exception(context.formatMessage(I18nConstants.XML_ENCODING_INVALID));
983      }
984      int i = stream.read();
985      do {
986        b.append((char) i);
987        i = stream.read();
988      } while (i != 0x3E);
989      String header = b.toString();
990      String e = null;
991      i = header.indexOf("encoding=\"");
992      if (i > -1) {
993        e = header.substring(i+10, i+15);
994      } else {
995        i = header.indexOf("encoding='");
996        if (i > -1) {
997          e = header.substring(i+10, i+15);
998        } 
999      }
1000      if (e != null && !"UTF-8".equalsIgnoreCase(e)) {
1001        logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, context.formatMessage(I18nConstants.XML_ENCODING_INVALID), IssueSeverity.ERROR);
1002      }
1003
1004      i = header.indexOf("version=\"");
1005      if (i > -1) {
1006        return header.substring(i+9, i+12);
1007      } else {
1008        i = header.indexOf("version='");
1009        if (i > -1) {
1010          return header.substring(i+9, i+12);          
1011        } 
1012      }
1013      return "?xml-p1?";
1014    } catch (Exception e) {
1015      // suppress this error 
1016      logError(errors, ValidationMessage.NO_RULE_DATE, 0, 0, "XML", IssueType.INVALID, e.getMessage(), IssueSeverity.ERROR);
1017    }
1018    return "?xml-p2?";
1019  }
1020
1021  class NullErrorHandler implements ErrorHandler {
1022    @Override
1023    public void fatalError(SAXParseException e) {
1024      // do nothing
1025    }
1026
1027    @Override
1028    public void error(SAXParseException e) {
1029      // do nothing
1030    }
1031
1032    @Override
1033    public void warning(SAXParseException e) {
1034      // do nothing
1035    }
1036  }
1037}