001package org.hl7.fhir.dstu3.elementmodel;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Comparator;
040import java.util.List;
041
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.SAXParserFactory;
045import javax.xml.transform.Transformer;
046import javax.xml.transform.TransformerFactory;
047import javax.xml.transform.dom.DOMResult;
048import javax.xml.transform.sax.SAXSource;
049
050import org.hl7.fhir.dstu3.context.IWorkerContext;
051import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement;
052import org.hl7.fhir.dstu3.formats.FormatUtilities;
053import org.hl7.fhir.dstu3.formats.IParser.OutputStyle;
054import org.hl7.fhir.dstu3.model.DateTimeType;
055import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation;
056import org.hl7.fhir.dstu3.model.Enumeration;
057import org.hl7.fhir.dstu3.model.StructureDefinition;
058import org.hl7.fhir.dstu3.utils.ToolingExtensions;
059import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator;
060import org.hl7.fhir.dstu3.utils.formats.XmlLocationData;
061import org.hl7.fhir.exceptions.DefinitionException;
062import org.hl7.fhir.exceptions.FHIRException;
063import org.hl7.fhir.exceptions.FHIRFormatError;
064import org.hl7.fhir.utilities.StringPair;
065import org.hl7.fhir.utilities.Utilities;
066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
068import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
069import org.hl7.fhir.utilities.xhtml.XhtmlNode;
070import org.hl7.fhir.utilities.xhtml.XhtmlParser;
071import org.hl7.fhir.utilities.xml.IXMLWriter;
072import org.hl7.fhir.utilities.xml.XMLUtil;
073import org.hl7.fhir.utilities.xml.XMLWriter;
074import org.w3c.dom.Document;
075import org.w3c.dom.Node;
076import org.xml.sax.InputSource;
077import org.xml.sax.XMLReader;
078
079@Deprecated
080public class XmlParser extends ParserBase {
081  private boolean allowXsiLocation;
082
083  public XmlParser(IWorkerContext context) {
084    super(context);
085  }
086
087  
088  public boolean isAllowXsiLocation() {
089    return allowXsiLocation;
090  }
091
092  public void setAllowXsiLocation(boolean allowXsiLocation) {
093    this.allowXsiLocation = allowXsiLocation;
094  }
095
096
097  public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
098                Document doc = null;
099        try {
100                DocumentBuilderFactory factory = XMLUtil.newXXEProtectedDocumentBuilderFactory();
101                // xxe protection
102                factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
103                factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
104                factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
105                factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
106                factory.setXIncludeAware(false);
107                factory.setExpandEntityReferences(false);
108                        
109                factory.setNamespaceAware(true);
110                if (policy == ValidationPolicy.EVERYTHING) {
111                        // use a slower parser that keeps location data
112                        TransformerFactory transformerFactory = XMLUtil.newXXEProtectedTransformerFactory();
113                        Transformer nullTransformer = transformerFactory.newTransformer();
114                        DocumentBuilder docBuilder = factory.newDocumentBuilder();
115                        doc = docBuilder.newDocument();
116                        DOMResult domResult = new DOMResult(doc);
117                        SAXParserFactory spf = XMLUtil.newXXEProtectedSaxParserFactory();
118                        spf.setNamespaceAware(true);
119                        spf.setValidating(false);
120                        XMLReader xmlReader = XMLUtil.getXXEProtectedXMLReader(spf);
121
122                        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
123                        InputSource inputSource = new InputSource(stream);
124                        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
125                        nullTransformer.transform(saxSource, domResult);
126                } else {
127                        DocumentBuilder builder = factory.newDocumentBuilder();
128                        doc = builder.parse(stream);
129                }
130        } catch (Exception e) {
131      logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
132      doc = null;
133        }
134        if (doc == null)
135                return null;
136        else
137      return parse(doc);
138  }
139
140  private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
141    if (policy == ValidationPolicy.EVERYTHING) {
142      Node node = document.getFirstChild();
143      while (node != null) {
144        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
145          logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
146        node = node.getNextSibling();
147      }
148    }
149  }
150
151  
152  private int line(Node node) {
153                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
154                return loc == null ? 0 : loc.getStartLine();
155  }
156
157  private int col(Node node) {
158                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
159                return loc == null ? 0 : loc.getStartColumn();
160  }
161
162  public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
163    checkForProcessingInstruction(doc);
164    org.w3c.dom.Element element = doc.getDocumentElement();
165    return parse(element);
166  }
167  
168  public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
169    String ns = element.getNamespaceURI();
170    String name = element.getLocalName();
171    String path = "/"+pathPrefix(ns)+name;
172    
173    StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
174    if (sd == null)
175      return null;
176
177    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
178    checkElement(element, path, result.getProperty());
179    result.markLocation(line(element), col(element));
180    result.setType(element.getLocalName());
181    parseChildren(path, element, result);
182    result.numberChildren();
183    return result;
184  }
185
186  private String pathPrefix(String ns) {
187    if (Utilities.noString(ns))
188      return "";
189    if (ns.equals(FormatUtilities.FHIR_NS))
190      return "f:";
191    if (ns.equals(FormatUtilities.XHTML_NS))
192      return "h:";
193    if (ns.equals("urn:hl7-org:v3"))
194      return "v3:";
195    return "?:";
196  }
197
198  private boolean empty(org.w3c.dom.Element element) {
199    for (int i = 0; i < element.getAttributes().getLength(); i++) {
200      String n = element.getAttributes().item(i).getNodeName();
201      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
202        return false;
203    }
204    if (!Utilities.noString(element.getTextContent().trim()))
205      return false;
206    
207    Node n = element.getFirstChild();
208    while (n != null) {
209      if (n.getNodeType() == Node.ELEMENT_NODE)
210        return false;
211      n = n.getNextSibling();
212    }
213    return true;
214  }
215  
216  private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
217    if (policy == ValidationPolicy.EVERYTHING) {
218      if (empty(element))
219        logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
220      String ns = FormatUtilities.FHIR_NS;
221      if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
222        ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
223      else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
224        ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
225      if (!element.getNamespaceURI().equals(ns))
226        logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
227    }
228  }
229
230  public Element parse(org.w3c.dom.Element base, String type) throws Exception {
231    StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
232    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
233    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
234    checkElement(base, path, result.getProperty());
235    result.setType(base.getLocalName());
236    parseChildren(path, base, result);
237    result.numberChildren();
238    return result;
239  }
240
241  private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
242        // this parsing routine retains the original order in a the XML file, to support validation
243        reapComments(node, context);
244    List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node));
245
246        String text = XMLUtil.getDirectText(node).trim();
247    if (!Utilities.noString(text)) {
248        Property property = getTextProp(properties);
249        if (property != null) {
250            context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
251        } else {
252        logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
253        }               
254    }
255    
256    for (int i = 0; i < node.getAttributes().getLength(); i++) {
257        Node attr = node.getAttributes().item(i);
258        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
259        Property property = getAttrProp(properties, attr.getNodeName());
260        if (property != null) {
261                  String av = attr.getNodeValue();
262                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
263                        av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
264                        if (property.getName().equals("value") && context.isPrimitive())
265                                context.setValue(av);
266                        else
267                    context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
268        } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) {
269          logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR);                      
270        }
271        }
272    }
273    
274    Node child = node.getFirstChild();
275    while (child != null) {
276        if (child.getNodeType() == Node.ELEMENT_NODE) {
277                Property property = getElementProp(properties, child.getLocalName());
278                if (property != null) {
279                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
280                XhtmlParser xp = new XhtmlParser();
281            XhtmlNode xhtml = xp.parseHtmlNode((org.w3c.dom.Element) child);
282            if (policy == ValidationPolicy.EVERYTHING) {
283              for (StringPair s : xp.getValidationIssues()) {
284                logError(line(child), col(child), path, IssueType.INVALID, s.getName() + " "+s.getValue(), IssueSeverity.ERROR);                
285              }
286            }
287                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
288                        } else {
289                          String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
290                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
291                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
292                                boolean ok = true;
293                                if (property.isChoice()) {
294                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
295                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
296                                                if (xsiType == null) {
297                          logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
298                          ok = false;
299                                                } else {
300                                                        if (xsiType.contains(":"))
301                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
302                                                        n.setType(xsiType);
303                                                }
304                                        } else
305                                          n.setType(n.getType());
306                                }
307                                context.getChildren().add(n);
308                                if (ok) {
309                                        if (property.isResource())
310                parseResource(npath, (org.w3c.dom.Element) child, n, property);
311                                        else
312                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
313                                }
314                        }
315        } else
316          logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);                    
317        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
318        logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
319        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
320        logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
321        }
322        child = child.getNextSibling();
323    }
324  }
325
326  private Property getElementProp(List<Property> properties, String nodeName) {
327                List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
328                // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
329                // and therefore the longer property names get evaluated first
330                Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
331                        @Override
332                        public int compare(Property o1, Property o2) {
333                                return o2.getName().length() - o1.getName().length();
334                        }
335                });
336        for (Property p : propsSortedByLongestFirst)
337                if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
338                  if (p.getName().equals(nodeName)) 
339                                  return p;
340                  if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
341                                  return p;
342                }
343        return null;
344        }
345
346        private Property getAttrProp(List<Property> properties, String nodeName) {
347        for (Property p : properties)
348                if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
349                                return p;
350        return null;
351  }
352
353        private Property getTextProp(List<Property> properties) {
354        for (Property p : properties)
355                if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
356                                return p;
357        return null;
358        }
359
360        private String convertForDateFormat(String fmt, String av) throws FHIRException {
361        if ("v3".equals(fmt)) {
362                DateTimeType d = DateTimeType.parseV3(av);
363                return d.asStringValue();
364        } else
365                throw new FHIRException("Unknown Data format '"+fmt+"'");
366        }
367
368  private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
369        org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
370    String name = res.getLocalName();
371    StructureDefinition sd = context.fetchTypeDefinition(name);
372    if (sd == null)
373      throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
374    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
375    parent.setType(name);
376    parseChildren(res.getLocalName(), res, parent);
377        }
378
379        private void reapComments(org.w3c.dom.Element element, Element context) {
380          Node node = element.getPreviousSibling();
381          while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
382                if (node.getNodeType() == Node.COMMENT_NODE)
383                        context.getComments().add(0, node.getTextContent());
384                node = node.getPreviousSibling();
385          }
386                node = element.getLastChild();
387                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
388                        node = node.getPreviousSibling();
389                }
390                while (node != null) {
391                        if (node.getNodeType() == Node.COMMENT_NODE)
392                                context.getComments().add(node.getTextContent());
393                        node = node.getNextSibling();
394                }
395        }
396
397        private boolean isAttr(Property property) {
398                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
399                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
400                                return true;
401                        }
402                }
403                return false;
404        }
405
406  private boolean isText(Property property) {
407                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
408                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
409                                return true;
410                        }
411                }
412                return false;
413  }
414
415        @Override
416  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException {
417    XMLWriter xml = new XMLWriter(stream, "UTF-8");
418    xml.setPretty(style == OutputStyle.PRETTY);
419    xml.start();
420    xml.setDefaultNamespace(e.getProperty().getNamespace());
421    composeElement(xml, e, e.getType());
422    xml.end();
423
424  }
425
426  public void compose(Element e, IXMLWriter xml) throws Exception {
427    xml.start();
428    xml.setDefaultNamespace(e.getProperty().getNamespace());
429    composeElement(xml, e, e.getType());
430    xml.end();
431  }
432
433  private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException {
434    for (String s : element.getComments()) {
435      xml.comment(s, true);
436    }
437    if (isText(element.getProperty())) {
438      if (linkResolver != null)
439        xml.link(linkResolver.resolveProperty(element.getProperty()));
440      xml.enter(elementName);
441      xml.text(element.getValue());
442      xml.exit(elementName);      
443    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
444      if (element.getType().equals("xhtml")) {
445        xml.escapedText(element.getValue());
446      } else if (isText(element.getProperty())) {
447        if (linkResolver != null)
448          xml.link(linkResolver.resolveProperty(element.getProperty()));
449        xml.text(element.getValue());
450      } else {
451        if (element.hasValue()) {
452          if (linkResolver != null)
453            xml.link(linkResolver.resolveType(element.getType()));
454        xml.attribute("value", element.getValue());
455        }
456        if (linkResolver != null)
457          xml.link(linkResolver.resolveProperty(element.getProperty()));
458                                if (element.hasChildren()) {
459                                        xml.enter(elementName);
460                                        for (Element child : element.getChildren()) 
461                                                composeElement(xml, child, child.getName());
462                                        xml.exit(elementName);
463                                } else
464        xml.element(elementName);
465      }
466    } else {
467      for (Element child : element.getChildren()) {
468        if (isAttr(child.getProperty())) {
469          if (linkResolver != null)
470            xml.link(linkResolver.resolveType(child.getType()));
471          xml.attribute(child.getName(), child.getValue());
472      }
473      }
474      if (linkResolver != null)
475        xml.link(linkResolver.resolveProperty(element.getProperty()));
476      xml.enter(elementName);
477      if (element.getSpecial() != null) {
478        if (linkResolver != null)
479          xml.link(linkResolver.resolveProperty(element.getProperty()));
480        xml.enter(element.getType());
481      }
482      for (Element child : element.getChildren()) {
483        if (isText(child.getProperty())) {
484          if (linkResolver != null)
485            xml.link(linkResolver.resolveProperty(element.getProperty()));
486          xml.text(child.getValue());
487        } else if (!isAttr(child.getProperty()))
488          composeElement(xml, child, child.getName());
489      }
490            if (element.getSpecial() != null)
491        xml.exit(element.getType());
492      xml.exit(elementName);
493    }
494  }
495
496}