001package org.hl7.fhir.r4.utils.formats;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032import java.io.IOException;
033import java.io.OutputStream;
034import java.io.OutputStreamWriter;
035import java.io.UnsupportedEncodingException;
036import java.util.ArrayList;
037import java.util.Collections;
038import java.util.HashMap;
039import java.util.HashSet;
040import java.util.List;
041import java.util.Map;
042import java.util.Set;
043import java.util.UUID;
044
045import org.hl7.fhir.exceptions.FHIRFormatError;
046import org.hl7.fhir.utilities.Utilities;
047
048@Deprecated
049public class Turtle {
050
051  public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
052
053  public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="
054      + GOOD_IRI_CHAR + "])+";
055  public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
056
057  // Object model
058  public abstract class Triple {
059    private String uri;
060  }
061
062  public class StringType extends Triple {
063    private String value;
064
065    public StringType(String value) {
066      super();
067      this.value = value;
068    }
069  }
070
071  public class Complex extends Triple {
072    protected List<Predicate> predicates = new ArrayList<Predicate>();
073
074    public Complex predicate(String predicate, String object) {
075      predicateSet.add(predicate);
076      objectSet.add(object);
077      return predicate(predicate, new StringType(object));
078    }
079
080    public Complex linkedPredicate(String predicate, String object, String link) {
081      predicateSet.add(predicate);
082      objectSet.add(object);
083      return linkedPredicate(predicate, new StringType(object), link);
084    }
085
086    public Complex predicate(String predicate, Triple object) {
087      Predicate p = getPredicate(predicate);
088      if (p == null) {
089        p = new Predicate();
090        p.predicate = predicate;
091        predicateSet.add(predicate);
092        predicates.add(p);
093      }
094      if (object instanceof StringType)
095        objectSet.add(((StringType) object).value);
096      p.objects.add(object);
097      return this;
098    }
099
100    protected Predicate getPredicate(String predicate) {
101      for (Predicate p : predicates)
102        if (p.predicate.equals(predicate))
103          return p;
104      return null;
105    }
106
107    public Complex linkedPredicate(String predicate, Triple object, String link) {
108      Predicate p = getPredicate(predicate);
109      if (p == null) {
110        p = new Predicate();
111        p.predicate = predicate;
112        p.link = link;
113        predicateSet.add(predicate);
114        predicates.add(p);
115      }
116      if (object instanceof StringType)
117        objectSet.add(((StringType) object).value);
118      p.objects.add(object);
119      return this;
120    }
121
122    public Complex predicate(String predicate) {
123      predicateSet.add(predicate);
124      Complex c = complex();
125      predicate(predicate, c);
126      return c;
127    }
128
129    public Complex linkedPredicate(String predicate, String link) {
130      predicateSet.add(predicate);
131      Complex c = complex();
132      linkedPredicate(predicate, c, link);
133      return c;
134    }
135
136    public void prefix(String code, String url) {
137      Turtle.this.prefix(code, url);
138    }
139  }
140
141  private class Predicate {
142    protected String predicate;
143    protected String link;
144    protected List<Triple> objects = new ArrayList<Turtle.Triple>();
145    protected String comment;
146
147    public String getPredicate() {
148      return predicate;
149    }
150
151    public String makelink() {
152      if (link == null)
153        return predicate;
154      else
155        return "<a href=\"" + link + "\">" + Utilities.escapeXml(predicate) + "</a>";
156    }
157
158    public List<Triple> getObjects() {
159      return objects;
160    }
161
162    public String getComment() {
163      return comment;
164    }
165  }
166
167  public class Subject extends Complex {
168    private String id;
169
170    public Predicate predicate(String predicate, Triple object, String comment) {
171      Predicate p = getPredicate(predicate);
172      if (p == null) {
173        p = new Predicate();
174        p.predicate = predicate;
175        predicateSet.add(predicate);
176        predicates.add(p);
177        p.comment = comment;
178      }
179      if (object instanceof StringType)
180        objectSet.add(((StringType) object).value);
181      p.objects.add(object);
182      return p;
183    }
184
185    public void comment(String comment) {
186      if (!Utilities.noString(comment)) {
187        predicate("rdfs:comment", literal(comment));
188        predicate("dcterms:description", literal(comment));
189      }
190    }
191
192    public void label(String label) {
193      if (!Utilities.noString(label)) {
194        predicate("rdfs:label", literal(label));
195        predicate("dc:title", literal(label));
196      }
197    }
198
199  }
200
201  public class Section {
202    private String name;
203    private List<Subject> subjects = new ArrayList<Subject>();
204
205    public Subject triple(String subject, String predicate, String object, String comment) {
206      return triple(subject, predicate, new StringType(object), comment);
207    }
208
209    public Subject triple(String subject, String predicate, String object) {
210      return triple(subject, predicate, new StringType(object));
211    }
212
213    public Subject triple(String subject, String predicate, Triple object) {
214      return triple(subject, predicate, object, null);
215    }
216
217    public Subject triple(String subject, String predicate, Triple object, String comment) {
218      Subject s = subject(subject);
219      s.predicate(predicate, object, comment);
220      return s;
221    }
222
223    public void comment(String subject, String comment) {
224      triple(subject, "rdfs:comment", literal(comment));
225      triple(subject, "dcterms:description", literal(comment));
226    }
227
228    public void label(String subject, String comment) {
229      triple(subject, "rdfs:label", literal(comment));
230      triple(subject, "dc:title", literal(comment));
231    }
232
233    public Subject subject(String subject) {
234      for (Subject ss : subjects)
235        if (ss.id.equals(subject))
236          return ss;
237      Subject s = new Subject();
238      s.id = subject;
239      subjects.add(s);
240      return s;
241    }
242
243    public boolean hasSubject(String subject) {
244      for (Subject ss : subjects)
245        if (ss.id.equals(subject))
246          return true;
247      return false;
248    }
249  }
250
251  private List<Section> sections = new ArrayList<Section>();
252  protected Set<String> subjectSet = new HashSet<String>();
253  protected Set<String> predicateSet = new HashSet<String>();
254  protected Set<String> objectSet = new HashSet<String>();
255  protected Map<String, String> prefixes = new HashMap<String, String>();
256
257  public void prefix(String code, String url) {
258    prefixes.put(code, url);
259  }
260
261  protected boolean hasSection(String sn) {
262    for (Section s : sections)
263      if (s.name.equals(sn))
264        return true;
265    return false;
266
267  }
268
269  public Section section(String sn) {
270    if (hasSection(sn))
271      throw new Error("Duplicate section name " + sn);
272    Section s = new Section();
273    s.name = sn;
274    sections.add(s);
275    return s;
276  }
277
278  protected String matches(String url, String prefixUri, String prefix) {
279    if (url.startsWith(prefixUri)) {
280      prefixes.put(prefix, prefixUri);
281      return prefix + ":" + escape(url.substring(prefixUri.length()), false);
282    }
283    return null;
284  }
285
286  protected Complex complex() {
287    return new Complex();
288  }
289
290  private void checkPrefix(Triple object) {
291    if (object instanceof StringType)
292      checkPrefix(((StringType) object).value);
293    else {
294      Complex obj = (Complex) object;
295      for (Predicate po : obj.predicates) {
296        checkPrefix(po.getPredicate());
297        for (Triple o : po.getObjects())
298          checkPrefix(o);
299      }
300    }
301  }
302
303  protected void checkPrefix(String pname) {
304    if (pname.startsWith("("))
305      return;
306    if (pname.startsWith("\""))
307      return;
308    if (pname.startsWith("<"))
309      return;
310
311    if (pname.contains(":")) {
312      String prefix = pname.substring(0, pname.indexOf(":"));
313      if (!prefixes.containsKey(prefix) && !prefix.equals("http") && !prefix.equals("urn"))
314        throw new Error("undefined prefix " + prefix);
315    }
316  }
317
318  protected StringType literal(String s) {
319    return new StringType("\"" + escape(s, true) + "\"");
320  }
321
322  protected StringType literalTyped(String s, String t) {
323    return new StringType("\"" + escape(s, true) + "\"^^xs:" + t);
324  }
325
326  public static String escape(String s, boolean string) {
327    if (s == null)
328      return "";
329
330    StringBuilder b = new StringBuilder();
331    for (char c : s.toCharArray()) {
332      if (c == '\r')
333        b.append("\\r");
334      else if (c == '\n')
335        b.append("\\n");
336      else if (c == '"')
337        b.append("\\\"");
338      else if (c == '\\')
339        b.append("\\\\");
340      else if (c == '/' && !string)
341        b.append("\\/");
342      else
343        b.append(c);
344    }
345    return b.toString();
346  }
347
348  protected String pctEncode(String s) {
349    if (s == null)
350      return "";
351
352    StringBuilder b = new StringBuilder();
353    for (char c : s.toCharArray()) {
354      if (c >= 'A' && c <= 'Z')
355        b.append(c);
356      else if (c >= 'a' && c <= 'z')
357        b.append(c);
358      else if (c >= '0' && c <= '9')
359        b.append(c);
360      else if (c == '.')
361        b.append(c);
362      else
363        b.append("%" + Integer.toHexString(c));
364    }
365    return b.toString();
366  }
367
368  protected List<String> sorted(Set<String> keys) {
369    List<String> names = new ArrayList<String>();
370    names.addAll(keys);
371    Collections.sort(names);
372    return names;
373  }
374
375  public void commit(OutputStream destination, boolean header) throws IOException {
376    LineOutputStreamWriter writer = new LineOutputStreamWriter(destination);
377    commitPrefixes(writer, header);
378    for (Section s : sections) {
379      commitSection(writer, s);
380    }
381    writer.ln("# -------------------------------------------------------------------------------------");
382    writer.ln();
383    writer.flush();
384    writer.close();
385  }
386
387  public String asHtml() throws Exception {
388    StringBuilder b = new StringBuilder();
389    b.append("<pre class=\"rdf\">\r\n");
390    commitPrefixes(b);
391    for (Section s : sections) {
392      commitSection(b, s);
393    }
394    b.append("</pre>\r\n");
395    b.append("\r\n");
396    return b.toString();
397  }
398
399  private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException {
400    if (header) {
401      writer.ln("# FHIR Sub-definitions");
402      writer.write("# This is work in progress, and may change rapidly \r\n");
403      writer.ln();
404      writer.write("# A note about policy: the focus here is providing the knowledge from \r\n");
405      writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n");
406      writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n");
407      writer.write("# appropriate\" means that the predicates are a faithful representation \r\n");
408      writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n");
409      writer.ln();
410      writer.write("# Where the community agrees on additional predicate statements (such \r\n");
411      writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n");
412      writer.write("# predicates \r\n");
413      writer.ln();
414      writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n");
415      writer.ln();
416      writer
417          .write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n");
418      writer.ln();
419    }
420    for (String p : sorted(prefixes.keySet()))
421      writer.ln("@prefix " + p + ": <" + prefixes.get(p) + "> .");
422    writer.ln();
423    if (header) {
424      writer.ln("# Predicates used in this file:");
425      for (String s : sorted(predicateSet))
426        writer.ln(" # " + s);
427      writer.ln();
428    }
429  }
430
431  private void commitPrefixes(StringBuilder b) throws Exception {
432    for (String p : sorted(prefixes.keySet()))
433      b.append("@prefix " + p + ": &lt;" + prefixes.get(p) + "&gt; .\r\n");
434    b.append("\r\n");
435  }
436
437  // private String lastSubject = null;
438  // private String lastComment = "";
439
440  private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException {
441    writer.ln("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()));
442    writer.ln();
443    for (Subject sbj : section.subjects) {
444      if (Utilities.noString(sbj.id)) {
445        writer.write("[");
446      } else {
447        writer.write(sbj.id);
448        writer.write(" ");
449      }
450      int i = 0;
451
452      for (Predicate p : sbj.predicates) {
453        writer.write(p.getPredicate());
454        writer.write(" ");
455        boolean first = true;
456        for (Triple o : p.getObjects()) {
457          if (first)
458            first = false;
459          else
460            writer.write(", ");
461          if (o instanceof StringType)
462            writer.write(((StringType) o).value);
463          else {
464            writer.write("[");
465            if (write((Complex) o, writer, 4))
466              writer.write("\r\n  ]");
467            else
468              writer.write("]");
469          }
470        }
471        String comment = p.comment == null ? "" : " # " + p.comment;
472        i++;
473        if (i < sbj.predicates.size())
474          writer.write(";" + comment + "\r\n  ");
475        else {
476          if (Utilities.noString(sbj.id))
477            writer.write("]");
478          writer.write(" ." + comment + "\r\n\r\n");
479        }
480      }
481    }
482  }
483
484  private void commitSection(StringBuilder b, Section section) throws Exception {
485    b.append("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()) + "\r\n");
486    b.append("\r\n");
487    for (Subject sbj : section.subjects) {
488      b.append(Utilities.escapeXml(sbj.id));
489      b.append(" ");
490      int i = 0;
491
492      for (Predicate p : sbj.predicates) {
493        b.append(p.makelink());
494        b.append(" ");
495        boolean first = true;
496        for (Triple o : p.getObjects()) {
497          if (first)
498            first = false;
499          else
500            b.append(", ");
501          if (o instanceof StringType)
502            b.append(Utilities.escapeXml(((StringType) o).value));
503          else {
504            b.append("[");
505            if (write((Complex) o, b, 4))
506              b.append("\r\n  ]");
507            else
508              b.append("]");
509          }
510        }
511        String comment = p.comment == null ? "" : " # " + p.comment;
512        i++;
513        if (i < sbj.predicates.size())
514          b.append(";" + Utilities.escapeXml(comment) + "\r\n  ");
515        else
516          b.append("." + Utilities.escapeXml(comment) + "\r\n\r\n");
517      }
518    }
519  }
520
521  protected class LineOutputStreamWriter extends OutputStreamWriter {
522    private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException {
523      super(out, "UTF-8");
524    }
525
526    private void ln() throws IOException {
527      write("\r\n");
528    }
529
530    private void ln(String s) throws IOException {
531      write(s);
532      write("\r\n");
533    }
534  }
535
536  public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException {
537    if (complex.predicates.isEmpty())
538      return false;
539    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1
540        && complex.predicates.get(0).getObjects().get(0) instanceof StringType
541        && Utilities.noString(complex.predicates.get(0).comment)) {
542      writer.write(" " + complex.predicates.get(0).predicate + " "
543          + ((StringType) complex.predicates.get(0).getObjects().get(0)).value);
544      return false;
545    }
546    String left = Utilities.padLeft("", ' ', indent);
547    int i = 0;
548    for (Predicate po : complex.predicates) {
549      writer.write("\r\n");
550      boolean first = true;
551      for (Triple o : po.getObjects()) {
552        if (first) {
553          first = false;
554          writer.write(left + " " + po.getPredicate() + " ");
555        } else
556          writer.write(", ");
557        if (o instanceof StringType)
558          writer.write(((StringType) o).value);
559        else {
560          writer.write("[");
561          if (write((Complex) o, writer, indent + 2))
562            writer.write("\r\n" + left + " ]");
563          else
564            writer.write(" ]");
565        }
566      }
567      i++;
568      if (i < complex.predicates.size())
569        writer.write(";");
570      if (!Utilities.noString(po.comment))
571        writer.write(" # " + escape(po.comment, false));
572    }
573    return true;
574  }
575
576  public boolean write(Complex complex, StringBuilder b, int indent) throws Exception {
577    if (complex.predicates.isEmpty())
578      return false;
579    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1
580        && complex.predicates.get(0).getObjects().get(0) instanceof StringType
581        && Utilities.noString(complex.predicates.get(0).comment)) {
582      b.append(" " + complex.predicates.get(0).makelink() + " "
583          + Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value));
584      return false;
585    }
586    String left = Utilities.padLeft("", ' ', indent);
587    int i = 0;
588    for (Predicate po : complex.predicates) {
589      b.append("\r\n");
590      boolean first = true;
591      for (Triple o : po.getObjects()) {
592        if (first) {
593          first = false;
594          b.append(left + " " + po.makelink() + " ");
595        } else
596          b.append(", ");
597        if (o instanceof StringType)
598          b.append(Utilities.escapeXml(((StringType) o).value));
599        else {
600          b.append("[");
601          if (write((Complex) o, b, indent + 2))
602            b.append(left + " ]");
603          else
604            b.append(" ]");
605        }
606      }
607      i++;
608      if (i < complex.predicates.size())
609        b.append(";");
610      if (!Utilities.noString(po.comment))
611        b.append(" # " + Utilities.escapeXml(escape(po.comment, false)));
612    }
613    return true;
614  }
615
616  public abstract class TTLObject {
617    protected int line;
618    protected int col;
619
620    abstract public boolean hasValue(String value);
621
622    public int getLine() {
623      return line;
624    }
625
626    public int getCol() {
627      return col;
628    }
629
630  }
631
632  public class TTLLiteral extends TTLObject {
633
634    private String value;
635    private String type;
636
637    protected TTLLiteral(int line, int col) {
638      this.line = line;
639      this.col = col;
640    }
641
642    @Override
643    public boolean hasValue(String value) {
644      return value.equals(this.value);
645    }
646
647    public String getValue() {
648      return value;
649    }
650
651    public String getType() {
652      return type;
653    }
654
655  }
656
657  public class TTLURL extends TTLObject {
658    private String uri;
659
660    protected TTLURL(int line, int col) {
661      this.line = line;
662      this.col = col;
663    }
664
665    public String getUri() {
666      return uri;
667    }
668
669    public void setUri(String uri) throws FHIRFormatError {
670      if (!uri.matches(IRI_URL))
671        throw new FHIRFormatError("Illegal URI " + uri);
672      this.uri = uri;
673    }
674
675    @Override
676    public boolean hasValue(String value) {
677      return value.equals(this.uri);
678    }
679  }
680
681  public class TTLList extends TTLObject {
682    private List<TTLObject> list = new ArrayList<Turtle.TTLObject>();
683
684    public TTLList(TTLObject obj) {
685      super();
686      list.add(obj);
687    }
688
689    @Override
690    public boolean hasValue(String value) {
691      for (TTLObject obj : list)
692        if (obj.hasValue(value))
693          return true;
694      return false;
695    }
696
697    public List<TTLObject> getList() {
698      return list;
699    }
700
701  }
702
703  public class TTLComplex extends TTLObject {
704    private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>();
705
706    protected TTLComplex(int line, int col) {
707      this.line = line;
708      this.col = col;
709    }
710
711    public Map<String, TTLObject> getPredicates() {
712      return predicates;
713    }
714
715    @Override
716    public boolean hasValue(String value) {
717      return false;
718    }
719
720    public void addPredicate(String uri, TTLObject obj) {
721      if (!predicates.containsKey(uri))
722        predicates.put(uri, obj);
723      else {
724        TTLObject eo = predicates.get(uri);
725        TTLList list = null;
726        if (eo instanceof TTLList)
727          list = (TTLList) eo;
728        else {
729          list = new TTLList(eo);
730          predicates.put(uri, list);
731        }
732        list.list.add(obj);
733      }
734    }
735
736    public void addPredicates(Map<String, TTLObject> values) {
737      for (String s : values.keySet()) {
738        addPredicate(s, values.get(s));
739      }
740    }
741  }
742
743  private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>();
744
745  private Object base;
746
747  public enum LexerTokenType {
748    TOKEN, // [, ], :, @
749    WORD, // a word
750    URI, // a URI <>
751    LITERAL // "..."
752  }
753
754  public class Lexer {
755
756    private String source;
757    private LexerTokenType type;
758    private int cursor, line, col, startLine, startCol;
759    private String token;
760
761    public Lexer(String source) throws FHIRFormatError {
762      this.source = source;
763      cursor = 0;
764      line = 1;
765      col = 1;
766      readNext(false);
767    }
768
769    private void skipWhitespace() {
770      while (cursor < source.length()) {
771        char ch = source.charAt(cursor);
772        if (Character.isWhitespace(ch))
773          grab();
774        else if (ch == '#') {
775          ch = grab();
776          while (cursor < source.length()) {
777            ch = grab();
778            if (ch == '\r' || ch == '\n') {
779              break;
780            }
781          }
782        } else
783          break;
784      }
785    }
786
787    private char grab() {
788      char c = source.charAt(cursor);
789      if (c == '\n') {
790        line++;
791        col = 1;
792      } else
793        col++;
794
795      cursor++;
796      return c;
797    }
798
799    private void readNext(boolean postColon) throws FHIRFormatError {
800      token = null;
801      type = null;
802      skipWhitespace();
803      if (cursor >= source.length())
804        return;
805      startLine = line;
806      startCol = col;
807      char ch = grab();
808      StringBuilder b = new StringBuilder();
809      switch (ch) {
810      case '@':
811      case '.':
812      case ':':
813      case ';':
814      case '^':
815      case ',':
816      case ']':
817      case '[':
818      case '(':
819      case ')':
820        type = LexerTokenType.TOKEN;
821        b.append(ch);
822        token = b.toString();
823        return;
824      case '<':
825        while (cursor < source.length()) {
826          ch = grab();
827          if (ch == '>')
828            break;
829          b.append(ch);
830        }
831        type = LexerTokenType.URI;
832        token = unescape(b.toString(), true);
833        return;
834      case '"':
835        b.append(ch);
836        String end = "\"";
837        while (cursor < source.length()) {
838          ch = grab();
839          if (b.length() == 2 && ch != '"' && b.equals("\"\"")) {
840            cursor--;
841            break;
842          }
843          b.append(ch);
844          if (ch == '"')
845            if (b.toString().equals("\"\"\""))
846              end = "\"\"\"";
847            else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\" + end))
848              break;
849        }
850        type = LexerTokenType.LITERAL;
851        token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false);
852        return;
853      case '\'':
854        b.append(ch);
855        end = "'";
856        while (cursor < source.length()) {
857          ch = grab();
858          if (b.equals("''") && ch != '\'') {
859            cursor--;
860            break;
861          }
862          b.append(ch);
863          if (b.toString().equals("'''"))
864            end = "'''";
865          else if (!b.toString().equals("''") && b.toString().endsWith(end))
866            break;
867        }
868        type = LexerTokenType.LITERAL;
869        token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false);
870        return;
871      default:
872        if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z')
873            || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) {
874          b.append(ch);
875          while (cursor < source.length()) {
876            ch = grab();
877            // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a',
878            // 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_',
879            // '-', '+', '.', '\\', '#'))
880            if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~')
881                || ((ch == ':') && !postColon))
882              break;
883            b.append(ch);
884          }
885          type = LexerTokenType.WORD;
886          token = b.toString();
887          cursor--;
888          return;
889        } else
890          throw error("unexpected lexer char " + ch);
891      }
892    }
893
894    private String unescape(String s, boolean isUri) throws FHIRFormatError {
895      StringBuilder b = new StringBuilder();
896      int i = 0;
897      while (i < s.length()) {
898        char ch = s.charAt(i);
899        if (ch == '\\' && i < s.length() - 1) {
900          i++;
901          switch (s.charAt(i)) {
902          case 't':
903            b.append('\t');
904            break;
905          case 'r':
906            b.append('\r');
907            break;
908          case 'n':
909            b.append('\n');
910            break;
911          case 'f':
912            b.append('\f');
913            break;
914          case '\'':
915            b.append('\'');
916            break;
917          case '\"':
918            b.append('\"');
919            break;
920          case '\\':
921            b.append('\\');
922            break;
923          case '/':
924            b.append('\\');
925            break;
926          case 'U':
927          case 'u':
928            i++;
929            int l = 4;
930            int uc = Integer.parseInt(s.substring(i, i + l), 16);
931            if (uc < (isUri ? 33 : 32)) {
932              l = 8;
933              uc = Integer.parseInt(s.substring(i, i + 8), 16);
934            }
935            if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
936              throw new FHIRFormatError("Illegal unicode character");
937            b.append(Character.toString(uc));
938            i = i + l;
939            break;
940          default:
941            throw new FHIRFormatError("Unknown character escape \\" + s.charAt(i));
942          }
943        } else {
944          b.append(ch);
945        }
946        i++;
947      }
948      return b.toString();
949    }
950
951    public boolean done() {
952      return type == null;
953    }
954
955    public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError {
956      if (type != null && this.type != type)
957        throw error("Unexpected type. Found " + this.type.toString() + " looking for a " + type.toString());
958      String res = token;
959      readNext(postColon);
960      return res;
961    }
962
963    public String peek() throws Exception {
964      return token;
965    }
966
967    public LexerTokenType peekType() {
968      return type;
969    }
970
971    public void token(String token) throws FHIRFormatError {
972      if (!token.equals(this.token))
973        throw error("Unexpected word " + this.token + " looking for " + token);
974      next(LexerTokenType.TOKEN, token.equals(":"));
975    }
976
977    public void word(String word) throws Exception {
978      if (!word.equals(this.token))
979        throw error("Unexpected word " + this.token + " looking for " + word);
980      next(LexerTokenType.WORD, false);
981    }
982
983    public String word() throws FHIRFormatError {
984      String t = token;
985      next(LexerTokenType.WORD, false);
986      return t;
987    }
988
989    public String uri() throws FHIRFormatError {
990      if (this.type != LexerTokenType.URI)
991        throw error("Unexpected type. Found " + this.type.toString() + " looking for a URI");
992      String t = token;
993      next(LexerTokenType.URI, false);
994      return t;
995    }
996
997    public String literal() throws FHIRFormatError {
998      if (this.type != LexerTokenType.LITERAL)
999        throw error("Unexpected type. Found " + this.type.toString() + " looking for a Literal");
1000      String t = token;
1001      next(LexerTokenType.LITERAL, false);
1002      return t;
1003    }
1004
1005    public boolean peek(LexerTokenType type, String token) {
1006      return this.type == type && this.token.equals(token);
1007    }
1008
1009    public FHIRFormatError error(String message) {
1010      return new FHIRFormatError("Syntax Error parsing Turtle on line " + Integer.toString(line) + " col "
1011          + Integer.toString(col) + ": " + message);
1012    }
1013
1014  }
1015  //
1016  // public void importTtl(Section sct, String ttl) throws Exception {
1017  // if (!Utilities.noString(ttl)) {
1018  // // System.out.println("import ttl: "+ttl);
1019  // Lexer lexer = new Lexer(ttl);
1020  // String subject = null;
1021  // String predicate = null;
1022  // while (!lexer.done()) {
1023  // if (subject == null)
1024  // subject = lexer.next();
1025  // if (predicate == null)
1026  // predicate = lexer.next();
1027  // if (lexer.peekType() == null) {
1028  // throw new Error("Unexpected end of input parsing turtle");
1029  // } if (lexer.peekType() == LexerTokenType.TOKEN) {
1030  // sct.triple(subject, predicate, lexer.next());
1031  // } else if (lexer.peek() == null) {
1032  // throw new Error("Unexected - turtle lexer found no token");
1033  // } else if (lexer.peek().equals("[")) {
1034  // sct.triple(subject, predicate, importComplex(lexer));
1035  // } else
1036  // throw new Exception("Not done yet");
1037  // String n = lexer.next();
1038  // if (Utilities.noString(n))
1039  // break;
1040  // if (n.equals(".")) {
1041  // subject = null;
1042  // predicate = null;
1043  // } else if (n.equals(";")) {
1044  // predicate = null;
1045  // } else if (!n.equals(","))
1046  // throw new Exception("Unexpected token "+n);
1047  // }
1048  // }
1049  // }
1050
1051  public void parse(String source) throws FHIRFormatError {
1052    prefixes.clear();
1053    prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#");
1054    parse(new Lexer(source));
1055  }
1056
1057  private void parse(Lexer lexer) throws FHIRFormatError {
1058    boolean doPrefixes = true;
1059    while (!lexer.done()) {
1060      if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX")
1061          || lexer.peek(LexerTokenType.WORD, "BASE"))) {
1062        boolean sparqlStyle = false;
1063        boolean base = false;
1064        if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1065          lexer.token("@");
1066          String p = lexer.word();
1067          if (p.equals("base"))
1068            base = true;
1069          else if (!p.equals("prefix"))
1070            throw new FHIRFormatError("Unexpected token " + p);
1071        } else {
1072          sparqlStyle = true;
1073          String p = lexer.word();
1074          if (p.equals("BASE"))
1075            base = true;
1076          else if (!p.equals("PREFIX"))
1077            throw new FHIRFormatError("Unexpected token " + p);
1078        }
1079        String prefix = null;
1080        if (!base) {
1081          prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null;
1082          lexer.token(":");
1083        }
1084        String url = lexer.next(LexerTokenType.URI, false);
1085        if (!sparqlStyle)
1086          lexer.token(".");
1087        if (!base)
1088          prefix(prefix, url);
1089        else if (this.base == null)
1090          this.base = url;
1091        else
1092          throw new FHIRFormatError("Duplicate @base");
1093      } else if (lexer.peekType() == LexerTokenType.URI) {
1094        doPrefixes = false;
1095        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1096        uri.setUri(lexer.uri());
1097        TTLComplex complex = parseComplex(lexer);
1098        objects.put(uri, complex);
1099        lexer.token(".");
1100      } else if (lexer.peekType() == LexerTokenType.WORD) {
1101        doPrefixes = false;
1102        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1103        String pfx = lexer.word();
1104        if (!prefixes.containsKey(pfx))
1105          throw new FHIRFormatError("Unknown prefix " + pfx);
1106        lexer.token(":");
1107        uri.setUri(prefixes.get(pfx) + lexer.word());
1108        TTLComplex complex = parseComplex(lexer);
1109        objects.put(uri, complex);
1110        lexer.token(".");
1111      } else if (lexer.peek(LexerTokenType.TOKEN, ":")) {
1112        doPrefixes = false;
1113        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1114        lexer.token(":");
1115        if (!prefixes.containsKey(null))
1116          throw new FHIRFormatError("Unknown prefix ''");
1117        uri.setUri(prefixes.get(null) + lexer.word());
1118        TTLComplex complex = parseComplex(lexer);
1119        objects.put(uri, complex);
1120        lexer.token(".");
1121      } else if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1122        doPrefixes = false;
1123        lexer.token("[");
1124        TTLComplex bnode = parseComplex(lexer);
1125        lexer.token("]");
1126        TTLComplex complex = null;
1127        if (!lexer.peek(LexerTokenType.TOKEN, ".")) {
1128          complex = parseComplex(lexer);
1129          // at this point, we collapse bnode and complex, and give bnode a fictional
1130          // identity
1131          bnode.addPredicates(complex.predicates);
1132        }
1133
1134        objects.put(anonymousId(), bnode);
1135        lexer.token(".");
1136      } else
1137        throw lexer.error("Unknown token " + lexer.token);
1138    }
1139  }
1140
1141  private TTLURL anonymousId() throws FHIRFormatError {
1142    TTLURL url = new TTLURL(-1, -1);
1143    url.setUri("urn:uuid:" + UUID.randomUUID().toString().toLowerCase());
1144    return url;
1145  }
1146
1147  private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError {
1148    TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol);
1149
1150    boolean done = lexer.peek(LexerTokenType.TOKEN, "]");
1151    while (!done) {
1152      String uri = null;
1153      if (lexer.peekType() == LexerTokenType.URI)
1154        uri = lexer.uri();
1155      else {
1156        String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1157        if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) {
1158          lexer.token(":");
1159          if (!prefixes.containsKey(t))
1160            throw new FHIRFormatError("unknown prefix " + t);
1161          uri = prefixes.get(t) + lexer.word();
1162        } else if (t.equals("a"))
1163          uri = prefixes.get("rdfs") + "type";
1164        else
1165          throw lexer.error("unexpected token");
1166      }
1167
1168      boolean inlist = false;
1169      if (lexer.peek(LexerTokenType.TOKEN, "(")) {
1170        inlist = true;
1171        lexer.token("(");
1172      }
1173
1174      boolean rpt = false;
1175      do {
1176        if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1177          lexer.token("[");
1178          result.addPredicate(uri, parseComplex(lexer));
1179          lexer.token("]");
1180        } else if (lexer.peekType() == LexerTokenType.URI) {
1181          TTLURL u = new TTLURL(lexer.startLine, lexer.startCol);
1182          u.setUri(lexer.uri());
1183          result.addPredicate(uri, u);
1184        } else if (lexer.peekType() == LexerTokenType.LITERAL) {
1185          TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol);
1186          u.value = lexer.literal();
1187          if (lexer.peek(LexerTokenType.TOKEN, "^")) {
1188            lexer.token("^");
1189            lexer.token("^");
1190            if (lexer.peekType() == LexerTokenType.URI) {
1191              u.type = lexer.uri();
1192            } else {
1193              String l = lexer.word();
1194              lexer.token(":");
1195              u.type = prefixes.get(l) + lexer.word();
1196            }
1197          }
1198          if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1199            // lang tag - skip it
1200            lexer.token("@");
1201            String lang = lexer.word();
1202            if (!lang.matches(LANG_REGEX)) {
1203              throw new FHIRFormatError("Invalid Language tag " + lang);
1204            }
1205          }
1206          result.addPredicate(uri, u);
1207        } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) {
1208          int sl = lexer.startLine;
1209          int sc = lexer.startCol;
1210          String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1211          if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1212            TTLLiteral u = new TTLLiteral(sl, sc);
1213            u.value = pfx;
1214            result.addPredicate(uri, u);
1215          } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1216            TTLLiteral u = new TTLLiteral(sl, sc);
1217            u.value = pfx;
1218            result.addPredicate(uri, u);
1219          } else {
1220            if (!prefixes.containsKey(pfx))
1221              throw new FHIRFormatError("Unknown prefix " + (pfx == null ? "''" : pfx));
1222            TTLURL u = new TTLURL(sl, sc);
1223            lexer.token(":");
1224            u.setUri(prefixes.get(pfx) + lexer.word());
1225            result.addPredicate(uri, u);
1226          }
1227        } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) {
1228          throw new FHIRFormatError("unexpected token " + lexer.token);
1229        }
1230
1231        if (inlist)
1232          rpt = !lexer.peek(LexerTokenType.TOKEN, ")");
1233        else {
1234          rpt = lexer.peek(LexerTokenType.TOKEN, ",");
1235          if (rpt)
1236            lexer.readNext(false);
1237        }
1238      } while (rpt);
1239      if (inlist)
1240        lexer.token(")");
1241
1242      if (lexer.peek(LexerTokenType.TOKEN, ";")) {
1243        while ((lexer.peek(LexerTokenType.TOKEN, ";")))
1244          lexer.token(";");
1245        done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]");
1246      } else {
1247        done = true;
1248      }
1249    }
1250    return result;
1251  }
1252
1253  public Map<TTLURL, TTLComplex> getObjects() {
1254    return objects;
1255  }
1256
1257  public TTLComplex getObject(String url) {
1258    for (TTLURL t : objects.keySet()) {
1259      if (t.getUri().equals(url))
1260        return objects.get(t);
1261    }
1262    return null;
1263  }
1264
1265  // public void parseFragment(Lexer lexer) throws Exception {
1266  // lexer.next(); // read [
1267  // Complex obj = new Complex();
1268  // while (!lexer.peek().equals("]")) {
1269  // String predicate = lexer.next();
1270  // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() ==
1271  // LexerTokenType.LITERAL) {
1272  // obj.predicate(predicate, lexer.next());
1273  // } else if (lexer.peek().equals("[")) {
1274  // obj.predicate(predicate, importComplex(lexer));
1275  // } else
1276  // throw new Exception("Not done yet");
1277  // if (lexer.peek().equals(";"))
1278  // lexer.next();
1279  // }
1280  // lexer.next(); // read ]
1281  // //return obj;
1282  // }
1283  //
1284  // public void importTtl(Section sct, String ttl) throws Exception {
1285  // if (!Utilities.noString(ttl)) {
1286  // // System.out.println("import ttl: "+ttl);
1287  // Lexer lexer = new Lexer(ttl);
1288  // String subject = null;
1289  // String predicate = null;
1290  // while (!lexer.done()) {
1291  // if (subject == null)
1292  // subject = lexer.next();
1293  // if (predicate == null)
1294  // predicate = lexer.next();
1295  // if (lexer.peekType() == null) {
1296  // throw new Error("Unexpected end of input parsing turtle");
1297  // } if (lexer.peekType() == LexerTokenType.TOKEN) {
1298  // sct.triple(subject, predicate, lexer.next());
1299  // } else if (lexer.peek() == null) {
1300  // throw new Error("Unexected - turtle lexer found no token");
1301  // } else if (lexer.peek().equals("[")) {
1302  // sct.triple(subject, predicate, importComplex(lexer));
1303  // } else
1304  // throw new Exception("Not done yet");
1305  // String n = lexer.next();
1306  // if (Utilities.noString(n))
1307  // break;
1308  // if (n.equals(".")) {
1309  // subject = null;
1310  // predicate = null;
1311  // } else if (n.equals(";")) {
1312  // predicate = null;
1313  // } else if (!n.equals(","))
1314  // throw new Exception("Unexpected token "+n);
1315  // }
1316  // }
1317  // }
1318
1319  // private Complex importComplex(Lexer lexer) throws Exception {
1320  // lexer.next(); // read [
1321  // Complex obj = new Complex();
1322  // while (!lexer.peek().equals("]")) {
1323  // String predicate = lexer.next();
1324  // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() ==
1325  // LexerTokenType.LITERAL) {
1326  // obj.predicate(predicate, lexer.next());
1327  // } else if (lexer.peek().equals("[")) {
1328  // obj.predicate(predicate, importComplex(lexer));
1329  // } else
1330  // throw new Exception("Not done yet");
1331  // if (lexer.peek().equals(";"))
1332  // lexer.next();
1333  // }
1334  // lexer.next(); // read ]
1335  // return obj;
1336  // }
1337
1338}