001package org.hl7.fhir.dstu3.elementmodel;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Comparator;
040import java.util.List;
041
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.SAXParserFactory;
045import javax.xml.transform.Transformer;
046import javax.xml.transform.TransformerFactory;
047import javax.xml.transform.dom.DOMResult;
048import javax.xml.transform.sax.SAXSource;
049
050import org.hl7.fhir.dstu3.context.IWorkerContext;
051import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement;
052import org.hl7.fhir.dstu3.formats.FormatUtilities;
053import org.hl7.fhir.dstu3.formats.IParser.OutputStyle;
054import org.hl7.fhir.dstu3.model.DateTimeType;
055import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation;
056import org.hl7.fhir.dstu3.model.Enumeration;
057import org.hl7.fhir.dstu3.model.StructureDefinition;
058import org.hl7.fhir.dstu3.utils.ToolingExtensions;
059import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator;
060import org.hl7.fhir.dstu3.utils.formats.XmlLocationData;
061import org.hl7.fhir.exceptions.DefinitionException;
062import org.hl7.fhir.exceptions.FHIRException;
063import org.hl7.fhir.exceptions.FHIRFormatError;
064import org.hl7.fhir.utilities.StringPair;
065import org.hl7.fhir.utilities.Utilities;
066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
068import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
069import org.hl7.fhir.utilities.xhtml.XhtmlNode;
070import org.hl7.fhir.utilities.xhtml.XhtmlParser;
071import org.hl7.fhir.utilities.xml.IXMLWriter;
072import org.hl7.fhir.utilities.xml.XMLUtil;
073import org.hl7.fhir.utilities.xml.XMLWriter;
074import org.w3c.dom.Document;
075import org.w3c.dom.Node;
076import org.xml.sax.InputSource;
077import org.xml.sax.XMLReader;
078
079public class XmlParser extends ParserBase {
080  private boolean allowXsiLocation;
081
082  public XmlParser(IWorkerContext context) {
083    super(context);
084  }
085
086  
087  public boolean isAllowXsiLocation() {
088    return allowXsiLocation;
089  }
090
091  public void setAllowXsiLocation(boolean allowXsiLocation) {
092    this.allowXsiLocation = allowXsiLocation;
093  }
094
095
096  public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
097                Document doc = null;
098        try {
099                DocumentBuilderFactory factory = XMLUtil.newXXEProtectedDocumentBuilderFactory();
100                // xxe protection
101                factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
102                factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
103                factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
104                factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
105                factory.setXIncludeAware(false);
106                factory.setExpandEntityReferences(false);
107                        
108                factory.setNamespaceAware(true);
109                if (policy == ValidationPolicy.EVERYTHING) {
110                        // use a slower parser that keeps location data
111                        TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory();
112                        Transformer nullTransformer = transformerFactory.newTransformer();
113                        DocumentBuilder docBuilder = factory.newDocumentBuilder();
114                        doc = docBuilder.newDocument();
115                        DOMResult domResult = new DOMResult(doc);
116                        SAXParserFactory spf = XMLUtil.newXXEProtectedSaxParserFactory();
117                        spf.setNamespaceAware(true);
118                        spf.setValidating(false);
119                        XMLReader xmlReader = XMLUtil.getXXEProtectedXMLReader(spf);
120
121                        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
122                        InputSource inputSource = new InputSource(stream);
123                        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
124                        nullTransformer.transform(saxSource, domResult);
125                } else {
126                        DocumentBuilder builder = factory.newDocumentBuilder();
127                        doc = builder.parse(stream);
128                }
129        } catch (Exception e) {
130      logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
131      doc = null;
132        }
133        if (doc == null)
134                return null;
135        else
136      return parse(doc);
137  }
138
139  private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
140    if (policy == ValidationPolicy.EVERYTHING) {
141      Node node = document.getFirstChild();
142      while (node != null) {
143        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
144          logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
145        node = node.getNextSibling();
146      }
147    }
148  }
149
150  
151  private int line(Node node) {
152                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
153                return loc == null ? 0 : loc.getStartLine();
154  }
155
156  private int col(Node node) {
157                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
158                return loc == null ? 0 : loc.getStartColumn();
159  }
160
161  public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
162    checkForProcessingInstruction(doc);
163    org.w3c.dom.Element element = doc.getDocumentElement();
164    return parse(element);
165  }
166  
167  public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
168    String ns = element.getNamespaceURI();
169    String name = element.getLocalName();
170    String path = "/"+pathPrefix(ns)+name;
171    
172    StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
173    if (sd == null)
174      return null;
175
176    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
177    checkElement(element, path, result.getProperty());
178    result.markLocation(line(element), col(element));
179    result.setType(element.getLocalName());
180    parseChildren(path, element, result);
181    result.numberChildren();
182    return result;
183  }
184
185  private String pathPrefix(String ns) {
186    if (Utilities.noString(ns))
187      return "";
188    if (ns.equals(FormatUtilities.FHIR_NS))
189      return "f:";
190    if (ns.equals(FormatUtilities.XHTML_NS))
191      return "h:";
192    if (ns.equals("urn:hl7-org:v3"))
193      return "v3:";
194    return "?:";
195  }
196
197  private boolean empty(org.w3c.dom.Element element) {
198    for (int i = 0; i < element.getAttributes().getLength(); i++) {
199      String n = element.getAttributes().item(i).getNodeName();
200      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
201        return false;
202    }
203    if (!Utilities.noString(element.getTextContent().trim()))
204      return false;
205    
206    Node n = element.getFirstChild();
207    while (n != null) {
208      if (n.getNodeType() == Node.ELEMENT_NODE)
209        return false;
210      n = n.getNextSibling();
211    }
212    return true;
213  }
214  
215  private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
216    if (policy == ValidationPolicy.EVERYTHING) {
217      if (empty(element))
218        logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
219      String ns = FormatUtilities.FHIR_NS;
220      if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
221        ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
222      else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
223        ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
224      if (!element.getNamespaceURI().equals(ns))
225        logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
226    }
227  }
228
229  public Element parse(org.w3c.dom.Element base, String type) throws Exception {
230    StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
231    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
232    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
233    checkElement(base, path, result.getProperty());
234    result.setType(base.getLocalName());
235    parseChildren(path, base, result);
236    result.numberChildren();
237    return result;
238  }
239
240  private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
241        // this parsing routine retains the original order in a the XML file, to support validation
242        reapComments(node, context);
243    List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node));
244
245        String text = XMLUtil.getDirectText(node).trim();
246    if (!Utilities.noString(text)) {
247        Property property = getTextProp(properties);
248        if (property != null) {
249            context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
250        } else {
251        logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
252        }               
253    }
254    
255    for (int i = 0; i < node.getAttributes().getLength(); i++) {
256        Node attr = node.getAttributes().item(i);
257        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
258        Property property = getAttrProp(properties, attr.getNodeName());
259        if (property != null) {
260                  String av = attr.getNodeValue();
261                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
262                        av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
263                        if (property.getName().equals("value") && context.isPrimitive())
264                                context.setValue(av);
265                        else
266                    context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
267        } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) {
268          logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR);                      
269        }
270        }
271    }
272    
273    Node child = node.getFirstChild();
274    while (child != null) {
275        if (child.getNodeType() == Node.ELEMENT_NODE) {
276                Property property = getElementProp(properties, child.getLocalName());
277                if (property != null) {
278                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
279                XhtmlParser xp = new XhtmlParser();
280            XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
281            if (policy == ValidationPolicy.EVERYTHING) {
282              for (StringPair s : xp.getValidationIssues()) {
283                logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR);                
284              }
285            }
286                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
287                        } else {
288                          String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
289                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
290                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
291                                boolean ok = true;
292                                if (property.isChoice()) {
293                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
294                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
295                                                if (xsiType == null) {
296                          logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
297                          ok = false;
298                                                } else {
299                                                        if (xsiType.contains(":"))
300                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
301                                                        n.setType(xsiType);
302                                                }
303                                        } else
304                                          n.setType(n.getType());
305                                }
306                                context.getChildren().add(n);
307                                if (ok) {
308                                        if (property.isResource())
309                parseResource(npath, (org.w3c.dom.Element) child, n, property);
310                                        else
311                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
312                                }
313                        }
314        } else
315          logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);                    
316        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
317        logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
318        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
319        logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
320        }
321        child = child.getNextSibling();
322    }
323  }
324
325  private Property getElementProp(List<Property> properties, String nodeName) {
326                List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
327                // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
328                // and therefore the longer property names get evaluated first
329                Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
330                        @Override
331                        public int compare(Property o1, Property o2) {
332                                return o2.getName().length() - o1.getName().length();
333                        }
334                });
335        for (Property p : propsSortedByLongestFirst)
336                if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
337                  if (p.getName().equals(nodeName)) 
338                                  return p;
339                  if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
340                                  return p;
341                }
342        return null;
343        }
344
345        private Property getAttrProp(List<Property> properties, String nodeName) {
346        for (Property p : properties)
347                if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
348                                return p;
349        return null;
350  }
351
352        private Property getTextProp(List<Property> properties) {
353        for (Property p : properties)
354                if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
355                                return p;
356        return null;
357        }
358
359        private String convertForDateFormat(String fmt, String av) throws FHIRException {
360        if ("v3".equals(fmt)) {
361                DateTimeType d = DateTimeType.parseV3(av);
362                return d.asStringValue();
363        } else
364                throw new FHIRException("Unknown Data format '"+fmt+"'");
365        }
366
367  private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
368        org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
369    String name = res.getLocalName();
370    StructureDefinition sd = context.fetchTypeDefinition(name);
371    if (sd == null)
372      throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
373    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
374    parent.setType(name);
375    parseChildren(res.getLocalName(), res, parent);
376        }
377
378        private void reapComments(org.w3c.dom.Element element, Element context) {
379          Node node = element.getPreviousSibling();
380          while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
381                if (node.getNodeType() == Node.COMMENT_NODE)
382                        context.getComments().add(0, node.getTextContent());
383                node = node.getPreviousSibling();
384          }
385                node = element.getLastChild();
386                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
387                        node = node.getPreviousSibling();
388                }
389                while (node != null) {
390                        if (node.getNodeType() == Node.COMMENT_NODE)
391                                context.getComments().add(node.getTextContent());
392                        node = node.getNextSibling();
393                }
394        }
395
396        private boolean isAttr(Property property) {
397                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
398                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
399                                return true;
400                        }
401                }
402                return false;
403        }
404
405  private boolean isText(Property property) {
406                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
407                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
408                                return true;
409                        }
410                }
411                return false;
412  }
413
414        @Override
415  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException {
416    XMLWriter xml = new XMLWriter(stream, "UTF-8");
417    xml.setPretty(style == OutputStyle.PRETTY);
418    xml.start();
419    xml.setDefaultNamespace(e.getProperty().getNamespace());
420    composeElement(xml, e, e.getType());
421    xml.end();
422
423  }
424
425  public void compose(Element e, IXMLWriter xml) throws Exception {
426    xml.start();
427    xml.setDefaultNamespace(e.getProperty().getNamespace());
428    composeElement(xml, e, e.getType());
429    xml.end();
430  }
431
432  private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException {
433    for (String s : element.getComments()) {
434      xml.comment(s, true);
435    }
436    if (isText(element.getProperty())) {
437      if (linkResolver != null)
438        xml.link(linkResolver.resolveProperty(element.getProperty()));
439      xml.enter(elementName);
440      xml.text(element.getValue());
441      xml.exit(elementName);      
442    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
443      if (element.getType().equals("xhtml")) {
444        xml.escapedText(element.getValue());
445      } else if (isText(element.getProperty())) {
446        if (linkResolver != null)
447          xml.link(linkResolver.resolveProperty(element.getProperty()));
448        xml.text(element.getValue());
449      } else {
450        if (element.hasValue()) {
451          if (linkResolver != null)
452            xml.link(linkResolver.resolveType(element.getType()));
453        xml.attribute("value", element.getValue());
454        }
455        if (linkResolver != null)
456          xml.link(linkResolver.resolveProperty(element.getProperty()));
457                                if (element.hasChildren()) {
458                                        xml.enter(elementName);
459                                        for (Element child : element.getChildren()) 
460                                                composeElement(xml, child, child.getName());
461                                        xml.exit(elementName);
462                                } else
463        xml.element(elementName);
464      }
465    } else {
466      for (Element child : element.getChildren()) {
467        if (isAttr(child.getProperty())) {
468          if (linkResolver != null)
469            xml.link(linkResolver.resolveType(child.getType()));
470          xml.attribute(child.getName(), child.getValue());
471      }
472      }
473      if (linkResolver != null)
474        xml.link(linkResolver.resolveProperty(element.getProperty()));
475      xml.enter(elementName);
476      if (element.getSpecial() != null) {
477        if (linkResolver != null)
478          xml.link(linkResolver.resolveProperty(element.getProperty()));
479        xml.enter(element.getType());
480      }
481      for (Element child : element.getChildren()) {
482        if (isText(child.getProperty())) {
483          if (linkResolver != null)
484            xml.link(linkResolver.resolveProperty(element.getProperty()));
485          xml.text(child.getValue());
486        } else if (!isAttr(child.getProperty()))
487          composeElement(xml, child, child.getName());
488      }
489            if (element.getSpecial() != null)
490        xml.exit(element.getType());
491      xml.exit(elementName);
492    }
493  }
494
495}