001package org.hl7.fhir.dstu3.elementmodel;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Comparator;
040import java.util.List;
041
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.SAXParser;
045import javax.xml.parsers.SAXParserFactory;
046import javax.xml.transform.Transformer;
047import javax.xml.transform.TransformerFactory;
048import javax.xml.transform.dom.DOMResult;
049import javax.xml.transform.sax.SAXSource;
050
051import org.hl7.fhir.dstu3.context.IWorkerContext;
052import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement;
053import org.hl7.fhir.dstu3.formats.FormatUtilities;
054import org.hl7.fhir.dstu3.formats.IParser.OutputStyle;
055import org.hl7.fhir.dstu3.model.DateTimeType;
056import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation;
057import org.hl7.fhir.dstu3.model.Enumeration;
058import org.hl7.fhir.dstu3.model.StructureDefinition;
059import org.hl7.fhir.dstu3.utils.ToolingExtensions;
060import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator;
061import org.hl7.fhir.dstu3.utils.formats.XmlLocationData;
062import org.hl7.fhir.exceptions.DefinitionException;
063import org.hl7.fhir.exceptions.FHIRException;
064import org.hl7.fhir.exceptions.FHIRFormatError;
065import org.hl7.fhir.utilities.StringPair;
066import org.hl7.fhir.utilities.Utilities;
067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
068import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
069import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
070import org.hl7.fhir.utilities.xhtml.XhtmlNode;
071import org.hl7.fhir.utilities.xhtml.XhtmlParser;
072import org.hl7.fhir.utilities.xml.IXMLWriter;
073import org.hl7.fhir.utilities.xml.XMLUtil;
074import org.hl7.fhir.utilities.xml.XMLWriter;
075import org.w3c.dom.Document;
076import org.w3c.dom.Node;
077import org.xml.sax.InputSource;
078import org.xml.sax.XMLReader;
079
080public class XmlParser extends ParserBase {
081  private boolean allowXsiLocation;
082
083  public XmlParser(IWorkerContext context) {
084    super(context);
085  }
086
087  
088  public boolean isAllowXsiLocation() {
089    return allowXsiLocation;
090  }
091
092  public void setAllowXsiLocation(boolean allowXsiLocation) {
093    this.allowXsiLocation = allowXsiLocation;
094  }
095
096
097  public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
098                Document doc = null;
099        try {
100                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
101                // xxe protection
102                factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
103                factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
104                factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
105                factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
106                factory.setXIncludeAware(false);
107                factory.setExpandEntityReferences(false);
108                        
109                factory.setNamespaceAware(true);
110                if (policy == ValidationPolicy.EVERYTHING) {
111                        // use a slower parser that keeps location data
112                        TransformerFactory transformerFactory = TransformerFactory.newInstance();
113                        Transformer nullTransformer = transformerFactory.newTransformer();
114                        DocumentBuilder docBuilder = factory.newDocumentBuilder();
115                        doc = docBuilder.newDocument();
116                        DOMResult domResult = new DOMResult(doc);
117                        SAXParserFactory spf = SAXParserFactory.newInstance();
118                        spf.setNamespaceAware(true);
119                        spf.setValidating(false);
120                // xxe protection
121                  spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
122        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
123                        SAXParser saxParser = spf.newSAXParser();
124                        XMLReader xmlReader = saxParser.getXMLReader();
125                // xxe protection
126                  xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
127                  xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
128                                
129                        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
130                        InputSource inputSource = new InputSource(stream);
131                        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
132                        nullTransformer.transform(saxSource, domResult);
133                } else {
134                        DocumentBuilder builder = factory.newDocumentBuilder();
135                        doc = builder.parse(stream);
136                }
137        } catch (Exception e) {
138      logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
139      doc = null;
140        }
141        if (doc == null)
142                return null;
143        else
144      return parse(doc);
145  }
146
147  private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
148    if (policy == ValidationPolicy.EVERYTHING) {
149      Node node = document.getFirstChild();
150      while (node != null) {
151        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
152          logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
153        node = node.getNextSibling();
154      }
155    }
156  }
157
158  
159  private int line(Node node) {
160                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
161                return loc == null ? 0 : loc.getStartLine();
162  }
163
164  private int col(Node node) {
165                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
166                return loc == null ? 0 : loc.getStartColumn();
167  }
168
169  public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
170    checkForProcessingInstruction(doc);
171    org.w3c.dom.Element element = doc.getDocumentElement();
172    return parse(element);
173  }
174  
175  public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
176    String ns = element.getNamespaceURI();
177    String name = element.getLocalName();
178    String path = "/"+pathPrefix(ns)+name;
179    
180    StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
181    if (sd == null)
182      return null;
183
184    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
185    checkElement(element, path, result.getProperty());
186    result.markLocation(line(element), col(element));
187    result.setType(element.getLocalName());
188    parseChildren(path, element, result);
189    result.numberChildren();
190    return result;
191  }
192
193  private String pathPrefix(String ns) {
194    if (Utilities.noString(ns))
195      return "";
196    if (ns.equals(FormatUtilities.FHIR_NS))
197      return "f:";
198    if (ns.equals(FormatUtilities.XHTML_NS))
199      return "h:";
200    if (ns.equals("urn:hl7-org:v3"))
201      return "v3:";
202    return "?:";
203  }
204
205  private boolean empty(org.w3c.dom.Element element) {
206    for (int i = 0; i < element.getAttributes().getLength(); i++) {
207      String n = element.getAttributes().item(i).getNodeName();
208      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
209        return false;
210    }
211    if (!Utilities.noString(element.getTextContent().trim()))
212      return false;
213    
214    Node n = element.getFirstChild();
215    while (n != null) {
216      if (n.getNodeType() == Node.ELEMENT_NODE)
217        return false;
218      n = n.getNextSibling();
219    }
220    return true;
221  }
222  
223  private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
224    if (policy == ValidationPolicy.EVERYTHING) {
225      if (empty(element))
226        logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
227      String ns = FormatUtilities.FHIR_NS;
228      if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
229        ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
230      else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
231        ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
232      if (!element.getNamespaceURI().equals(ns))
233        logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
234    }
235  }
236
237  public Element parse(org.w3c.dom.Element base, String type) throws Exception {
238    StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
239    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
240    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
241    checkElement(base, path, result.getProperty());
242    result.setType(base.getLocalName());
243    parseChildren(path, base, result);
244    result.numberChildren();
245    return result;
246  }
247
248  private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
249        // this parsing routine retains the original order in a the XML file, to support validation
250        reapComments(node, context);
251    List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node));
252
253        String text = XMLUtil.getDirectText(node).trim();
254    if (!Utilities.noString(text)) {
255        Property property = getTextProp(properties);
256        if (property != null) {
257            context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
258        } else {
259        logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
260        }               
261    }
262    
263    for (int i = 0; i < node.getAttributes().getLength(); i++) {
264        Node attr = node.getAttributes().item(i);
265        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
266        Property property = getAttrProp(properties, attr.getNodeName());
267        if (property != null) {
268                  String av = attr.getNodeValue();
269                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
270                        av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
271                        if (property.getName().equals("value") && context.isPrimitive())
272                                context.setValue(av);
273                        else
274                    context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
275        } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) {
276          logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR);                      
277        }
278        }
279    }
280    
281    Node child = node.getFirstChild();
282    while (child != null) {
283        if (child.getNodeType() == Node.ELEMENT_NODE) {
284                Property property = getElementProp(properties, child.getLocalName());
285                if (property != null) {
286                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
287                XhtmlParser xp = new XhtmlParser();
288            XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
289            if (policy == ValidationPolicy.EVERYTHING) {
290              for (StringPair s : xp.getValidationIssues()) {
291                logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR);                
292              }
293            }
294                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
295                        } else {
296                          String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
297                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
298                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
299                                boolean ok = true;
300                                if (property.isChoice()) {
301                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
302                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
303                                                if (xsiType == null) {
304                          logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
305                          ok = false;
306                                                } else {
307                                                        if (xsiType.contains(":"))
308                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
309                                                        n.setType(xsiType);
310                                                }
311                                        } else
312                                          n.setType(n.getType());
313                                }
314                                context.getChildren().add(n);
315                                if (ok) {
316                                        if (property.isResource())
317                parseResource(npath, (org.w3c.dom.Element) child, n, property);
318                                        else
319                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
320                                }
321                        }
322        } else
323          logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);                    
324        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
325        logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
326        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
327        logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
328        }
329        child = child.getNextSibling();
330    }
331  }
332
333  private Property getElementProp(List<Property> properties, String nodeName) {
334                List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
335                // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
336                // and therefore the longer property names get evaluated first
337                Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
338                        @Override
339                        public int compare(Property o1, Property o2) {
340                                return o2.getName().length() - o1.getName().length();
341                        }
342                });
343        for (Property p : propsSortedByLongestFirst)
344                if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
345                  if (p.getName().equals(nodeName)) 
346                                  return p;
347                  if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
348                                  return p;
349                }
350        return null;
351        }
352
353        private Property getAttrProp(List<Property> properties, String nodeName) {
354        for (Property p : properties)
355                if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
356                                return p;
357        return null;
358  }
359
360        private Property getTextProp(List<Property> properties) {
361        for (Property p : properties)
362                if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
363                                return p;
364        return null;
365        }
366
367        private String convertForDateFormat(String fmt, String av) throws FHIRException {
368        if ("v3".equals(fmt)) {
369                DateTimeType d = DateTimeType.parseV3(av);
370                return d.asStringValue();
371        } else
372                throw new FHIRException("Unknown Data format '"+fmt+"'");
373        }
374
375  private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
376        org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
377    String name = res.getLocalName();
378    StructureDefinition sd = context.fetchTypeDefinition(name);
379    if (sd == null)
380      throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
381    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
382    parent.setType(name);
383    parseChildren(res.getLocalName(), res, parent);
384        }
385
386        private void reapComments(org.w3c.dom.Element element, Element context) {
387          Node node = element.getPreviousSibling();
388          while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
389                if (node.getNodeType() == Node.COMMENT_NODE)
390                        context.getComments().add(0, node.getTextContent());
391                node = node.getPreviousSibling();
392          }
393                node = element.getLastChild();
394                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
395                        node = node.getPreviousSibling();
396                }
397                while (node != null) {
398                        if (node.getNodeType() == Node.COMMENT_NODE)
399                                context.getComments().add(node.getTextContent());
400                        node = node.getNextSibling();
401                }
402        }
403
404        private boolean isAttr(Property property) {
405                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
406                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
407                                return true;
408                        }
409                }
410                return false;
411        }
412
413  private boolean isText(Property property) {
414                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
415                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
416                                return true;
417                        }
418                }
419                return false;
420  }
421
422        @Override
423  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException {
424    XMLWriter xml = new XMLWriter(stream, "UTF-8");
425    xml.setPretty(style == OutputStyle.PRETTY);
426    xml.start();
427    xml.setDefaultNamespace(e.getProperty().getNamespace());
428    composeElement(xml, e, e.getType());
429    xml.end();
430
431  }
432
433  public void compose(Element e, IXMLWriter xml) throws Exception {
434    xml.start();
435    xml.setDefaultNamespace(e.getProperty().getNamespace());
436    composeElement(xml, e, e.getType());
437    xml.end();
438  }
439
440  private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException {
441    for (String s : element.getComments()) {
442      xml.comment(s, true);
443    }
444    if (isText(element.getProperty())) {
445      if (linkResolver != null)
446        xml.link(linkResolver.resolveProperty(element.getProperty()));
447      xml.enter(elementName);
448      xml.text(element.getValue());
449      xml.exit(elementName);      
450    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
451      if (element.getType().equals("xhtml")) {
452        xml.escapedText(element.getValue());
453      } else if (isText(element.getProperty())) {
454        if (linkResolver != null)
455          xml.link(linkResolver.resolveProperty(element.getProperty()));
456        xml.text(element.getValue());
457      } else {
458        if (element.hasValue()) {
459          if (linkResolver != null)
460            xml.link(linkResolver.resolveType(element.getType()));
461        xml.attribute("value", element.getValue());
462        }
463        if (linkResolver != null)
464          xml.link(linkResolver.resolveProperty(element.getProperty()));
465                                if (element.hasChildren()) {
466                                        xml.enter(elementName);
467                                        for (Element child : element.getChildren()) 
468                                                composeElement(xml, child, child.getName());
469                                        xml.exit(elementName);
470                                } else
471        xml.element(elementName);
472      }
473    } else {
474      for (Element child : element.getChildren()) {
475        if (isAttr(child.getProperty())) {
476          if (linkResolver != null)
477            xml.link(linkResolver.resolveType(child.getType()));
478          xml.attribute(child.getName(), child.getValue());
479      }
480      }
481      if (linkResolver != null)
482        xml.link(linkResolver.resolveProperty(element.getProperty()));
483      xml.enter(elementName);
484      if (element.getSpecial() != null) {
485        if (linkResolver != null)
486          xml.link(linkResolver.resolveProperty(element.getProperty()));
487        xml.enter(element.getType());
488      }
489      for (Element child : element.getChildren()) {
490        if (isText(child.getProperty())) {
491          if (linkResolver != null)
492            xml.link(linkResolver.resolveProperty(element.getProperty()));
493          xml.text(child.getValue());
494        } else if (!isAttr(child.getProperty()))
495          composeElement(xml, child, child.getName());
496      }
497            if (element.getSpecial() != null)
498        xml.exit(element.getType());
499      xml.exit(elementName);
500    }
501  }
502
503}