001package org.hl7.fhir.r4.utils.formats;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032import java.io.IOException;
033import java.io.OutputStream;
034import java.io.OutputStreamWriter;
035import java.io.UnsupportedEncodingException;
036import java.util.ArrayList;
037import java.util.Collections;
038import java.util.HashMap;
039import java.util.HashSet;
040import java.util.List;
041import java.util.Map;
042import java.util.Set;
043import java.util.UUID;
044
045import org.hl7.fhir.exceptions.FHIRFormatError;
046import org.hl7.fhir.utilities.Utilities;
047
048public class Turtle {
049
050  public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
051
052  public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="
053      + GOOD_IRI_CHAR + "])+";
054  public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
055
056  // Object model
057  public abstract class Triple {
058    private String uri;
059  }
060
061  public class StringType extends Triple {
062    private String value;
063
064    public StringType(String value) {
065      super();
066      this.value = value;
067    }
068  }
069
070  public class Complex extends Triple {
071    protected List<Predicate> predicates = new ArrayList<Predicate>();
072
073    public Complex predicate(String predicate, String object) {
074      predicateSet.add(predicate);
075      objectSet.add(object);
076      return predicate(predicate, new StringType(object));
077    }
078
079    public Complex linkedPredicate(String predicate, String object, String link) {
080      predicateSet.add(predicate);
081      objectSet.add(object);
082      return linkedPredicate(predicate, new StringType(object), link);
083    }
084
085    public Complex predicate(String predicate, Triple object) {
086      Predicate p = getPredicate(predicate);
087      if (p == null) {
088        p = new Predicate();
089        p.predicate = predicate;
090        predicateSet.add(predicate);
091        predicates.add(p);
092      }
093      if (object instanceof StringType)
094        objectSet.add(((StringType) object).value);
095      p.objects.add(object);
096      return this;
097    }
098
099    protected Predicate getPredicate(String predicate) {
100      for (Predicate p : predicates)
101        if (p.predicate.equals(predicate))
102          return p;
103      return null;
104    }
105
106    public Complex linkedPredicate(String predicate, Triple object, String link) {
107      Predicate p = getPredicate(predicate);
108      if (p == null) {
109        p = new Predicate();
110        p.predicate = predicate;
111        p.link = link;
112        predicateSet.add(predicate);
113        predicates.add(p);
114      }
115      if (object instanceof StringType)
116        objectSet.add(((StringType) object).value);
117      p.objects.add(object);
118      return this;
119    }
120
121    public Complex predicate(String predicate) {
122      predicateSet.add(predicate);
123      Complex c = complex();
124      predicate(predicate, c);
125      return c;
126    }
127
128    public Complex linkedPredicate(String predicate, String link) {
129      predicateSet.add(predicate);
130      Complex c = complex();
131      linkedPredicate(predicate, c, link);
132      return c;
133    }
134
135    public void prefix(String code, String url) {
136      Turtle.this.prefix(code, url);
137    }
138  }
139
140  private class Predicate {
141    protected String predicate;
142    protected String link;
143    protected List<Triple> objects = new ArrayList<Turtle.Triple>();
144    protected String comment;
145
146    public String getPredicate() {
147      return predicate;
148    }
149
150    public String makelink() {
151      if (link == null)
152        return predicate;
153      else
154        return "<a href=\"" + link + "\">" + Utilities.escapeXml(predicate) + "</a>";
155    }
156
157    public List<Triple> getObjects() {
158      return objects;
159    }
160
161    public String getComment() {
162      return comment;
163    }
164  }
165
166  public class Subject extends Complex {
167    private String id;
168
169    public Predicate predicate(String predicate, Triple object, String comment) {
170      Predicate p = getPredicate(predicate);
171      if (p == null) {
172        p = new Predicate();
173        p.predicate = predicate;
174        predicateSet.add(predicate);
175        predicates.add(p);
176        p.comment = comment;
177      }
178      if (object instanceof StringType)
179        objectSet.add(((StringType) object).value);
180      p.objects.add(object);
181      return p;
182    }
183
184    public void comment(String comment) {
185      if (!Utilities.noString(comment)) {
186        predicate("rdfs:comment", literal(comment));
187        predicate("dcterms:description", literal(comment));
188      }
189    }
190
191    public void label(String label) {
192      if (!Utilities.noString(label)) {
193        predicate("rdfs:label", literal(label));
194        predicate("dc:title", literal(label));
195      }
196    }
197
198  }
199
200  public class Section {
201    private String name;
202    private List<Subject> subjects = new ArrayList<Subject>();
203
204    public Subject triple(String subject, String predicate, String object, String comment) {
205      return triple(subject, predicate, new StringType(object), comment);
206    }
207
208    public Subject triple(String subject, String predicate, String object) {
209      return triple(subject, predicate, new StringType(object));
210    }
211
212    public Subject triple(String subject, String predicate, Triple object) {
213      return triple(subject, predicate, object, null);
214    }
215
216    public Subject triple(String subject, String predicate, Triple object, String comment) {
217      Subject s = subject(subject);
218      s.predicate(predicate, object, comment);
219      return s;
220    }
221
222    public void comment(String subject, String comment) {
223      triple(subject, "rdfs:comment", literal(comment));
224      triple(subject, "dcterms:description", literal(comment));
225    }
226
227    public void label(String subject, String comment) {
228      triple(subject, "rdfs:label", literal(comment));
229      triple(subject, "dc:title", literal(comment));
230    }
231
232    public Subject subject(String subject) {
233      for (Subject ss : subjects)
234        if (ss.id.equals(subject))
235          return ss;
236      Subject s = new Subject();
237      s.id = subject;
238      subjects.add(s);
239      return s;
240    }
241
242    public boolean hasSubject(String subject) {
243      for (Subject ss : subjects)
244        if (ss.id.equals(subject))
245          return true;
246      return false;
247    }
248  }
249
250  private List<Section> sections = new ArrayList<Section>();
251  protected Set<String> subjectSet = new HashSet<String>();
252  protected Set<String> predicateSet = new HashSet<String>();
253  protected Set<String> objectSet = new HashSet<String>();
254  protected Map<String, String> prefixes = new HashMap<String, String>();
255
256  public void prefix(String code, String url) {
257    prefixes.put(code, url);
258  }
259
260  protected boolean hasSection(String sn) {
261    for (Section s : sections)
262      if (s.name.equals(sn))
263        return true;
264    return false;
265
266  }
267
268  public Section section(String sn) {
269    if (hasSection(sn))
270      throw new Error("Duplicate section name " + sn);
271    Section s = new Section();
272    s.name = sn;
273    sections.add(s);
274    return s;
275  }
276
277  protected String matches(String url, String prefixUri, String prefix) {
278    if (url.startsWith(prefixUri)) {
279      prefixes.put(prefix, prefixUri);
280      return prefix + ":" + escape(url.substring(prefixUri.length()), false);
281    }
282    return null;
283  }
284
285  protected Complex complex() {
286    return new Complex();
287  }
288
289  private void checkPrefix(Triple object) {
290    if (object instanceof StringType)
291      checkPrefix(((StringType) object).value);
292    else {
293      Complex obj = (Complex) object;
294      for (Predicate po : obj.predicates) {
295        checkPrefix(po.getPredicate());
296        for (Triple o : po.getObjects())
297          checkPrefix(o);
298      }
299    }
300  }
301
302  protected void checkPrefix(String pname) {
303    if (pname.startsWith("("))
304      return;
305    if (pname.startsWith("\""))
306      return;
307    if (pname.startsWith("<"))
308      return;
309
310    if (pname.contains(":")) {
311      String prefix = pname.substring(0, pname.indexOf(":"));
312      if (!prefixes.containsKey(prefix) && !prefix.equals("http") && !prefix.equals("urn"))
313        throw new Error("undefined prefix " + prefix);
314    }
315  }
316
317  protected StringType literal(String s) {
318    return new StringType("\"" + escape(s, true) + "\"");
319  }
320
321  protected StringType literalTyped(String s, String t) {
322    return new StringType("\"" + escape(s, true) + "\"^^xs:" + t);
323  }
324
325  public static String escape(String s, boolean string) {
326    if (s == null)
327      return "";
328
329    StringBuilder b = new StringBuilder();
330    for (char c : s.toCharArray()) {
331      if (c == '\r')
332        b.append("\\r");
333      else if (c == '\n')
334        b.append("\\n");
335      else if (c == '"')
336        b.append("\\\"");
337      else if (c == '\\')
338        b.append("\\\\");
339      else if (c == '/' && !string)
340        b.append("\\/");
341      else
342        b.append(c);
343    }
344    return b.toString();
345  }
346
347  protected String pctEncode(String s) {
348    if (s == null)
349      return "";
350
351    StringBuilder b = new StringBuilder();
352    for (char c : s.toCharArray()) {
353      if (c >= 'A' && c <= 'Z')
354        b.append(c);
355      else if (c >= 'a' && c <= 'z')
356        b.append(c);
357      else if (c >= '0' && c <= '9')
358        b.append(c);
359      else if (c == '.')
360        b.append(c);
361      else
362        b.append("%" + Integer.toHexString(c));
363    }
364    return b.toString();
365  }
366
367  protected List<String> sorted(Set<String> keys) {
368    List<String> names = new ArrayList<String>();
369    names.addAll(keys);
370    Collections.sort(names);
371    return names;
372  }
373
374  public void commit(OutputStream destination, boolean header) throws IOException {
375    LineOutputStreamWriter writer = new LineOutputStreamWriter(destination);
376    commitPrefixes(writer, header);
377    for (Section s : sections) {
378      commitSection(writer, s);
379    }
380    writer.ln("# -------------------------------------------------------------------------------------");
381    writer.ln();
382    writer.flush();
383    writer.close();
384  }
385
386  public String asHtml() throws Exception {
387    StringBuilder b = new StringBuilder();
388    b.append("<pre class=\"rdf\">\r\n");
389    commitPrefixes(b);
390    for (Section s : sections) {
391      commitSection(b, s);
392    }
393    b.append("</pre>\r\n");
394    b.append("\r\n");
395    return b.toString();
396  }
397
398  private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException {
399    if (header) {
400      writer.ln("# FHIR Sub-definitions");
401      writer.write("# This is work in progress, and may change rapidly \r\n");
402      writer.ln();
403      writer.write("# A note about policy: the focus here is providing the knowledge from \r\n");
404      writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n");
405      writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n");
406      writer.write("# appropriate\" means that the predicates are a faithful representation \r\n");
407      writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n");
408      writer.ln();
409      writer.write("# Where the community agrees on additional predicate statements (such \r\n");
410      writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n");
411      writer.write("# predicates \r\n");
412      writer.ln();
413      writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n");
414      writer.ln();
415      writer
416          .write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n");
417      writer.ln();
418    }
419    for (String p : sorted(prefixes.keySet()))
420      writer.ln("@prefix " + p + ": <" + prefixes.get(p) + "> .");
421    writer.ln();
422    if (header) {
423      writer.ln("# Predicates used in this file:");
424      for (String s : sorted(predicateSet))
425        writer.ln(" # " + s);
426      writer.ln();
427    }
428  }
429
430  private void commitPrefixes(StringBuilder b) throws Exception {
431    for (String p : sorted(prefixes.keySet()))
432      b.append("@prefix " + p + ": &lt;" + prefixes.get(p) + "&gt; .\r\n");
433    b.append("\r\n");
434  }
435
436  // private String lastSubject = null;
437  // private String lastComment = "";
438
439  private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException {
440    writer.ln("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()));
441    writer.ln();
442    for (Subject sbj : section.subjects) {
443      if (Utilities.noString(sbj.id)) {
444        writer.write("[");
445      } else {
446        writer.write(sbj.id);
447        writer.write(" ");
448      }
449      int i = 0;
450
451      for (Predicate p : sbj.predicates) {
452        writer.write(p.getPredicate());
453        writer.write(" ");
454        boolean first = true;
455        for (Triple o : p.getObjects()) {
456          if (first)
457            first = false;
458          else
459            writer.write(", ");
460          if (o instanceof StringType)
461            writer.write(((StringType) o).value);
462          else {
463            writer.write("[");
464            if (write((Complex) o, writer, 4))
465              writer.write("\r\n  ]");
466            else
467              writer.write("]");
468          }
469        }
470        String comment = p.comment == null ? "" : " # " + p.comment;
471        i++;
472        if (i < sbj.predicates.size())
473          writer.write(";" + comment + "\r\n  ");
474        else {
475          if (Utilities.noString(sbj.id))
476            writer.write("]");
477          writer.write(" ." + comment + "\r\n\r\n");
478        }
479      }
480    }
481  }
482
483  private void commitSection(StringBuilder b, Section section) throws Exception {
484    b.append("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()) + "\r\n");
485    b.append("\r\n");
486    for (Subject sbj : section.subjects) {
487      b.append(Utilities.escapeXml(sbj.id));
488      b.append(" ");
489      int i = 0;
490
491      for (Predicate p : sbj.predicates) {
492        b.append(p.makelink());
493        b.append(" ");
494        boolean first = true;
495        for (Triple o : p.getObjects()) {
496          if (first)
497            first = false;
498          else
499            b.append(", ");
500          if (o instanceof StringType)
501            b.append(Utilities.escapeXml(((StringType) o).value));
502          else {
503            b.append("[");
504            if (write((Complex) o, b, 4))
505              b.append("\r\n  ]");
506            else
507              b.append("]");
508          }
509        }
510        String comment = p.comment == null ? "" : " # " + p.comment;
511        i++;
512        if (i < sbj.predicates.size())
513          b.append(";" + Utilities.escapeXml(comment) + "\r\n  ");
514        else
515          b.append("." + Utilities.escapeXml(comment) + "\r\n\r\n");
516      }
517    }
518  }
519
520  protected class LineOutputStreamWriter extends OutputStreamWriter {
521    private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException {
522      super(out, "UTF-8");
523    }
524
525    private void ln() throws IOException {
526      write("\r\n");
527    }
528
529    private void ln(String s) throws IOException {
530      write(s);
531      write("\r\n");
532    }
533  }
534
535  public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException {
536    if (complex.predicates.isEmpty())
537      return false;
538    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1
539        && complex.predicates.get(0).getObjects().get(0) instanceof StringType
540        && Utilities.noString(complex.predicates.get(0).comment)) {
541      writer.write(" " + complex.predicates.get(0).predicate + " "
542          + ((StringType) complex.predicates.get(0).getObjects().get(0)).value);
543      return false;
544    }
545    String left = Utilities.padLeft("", ' ', indent);
546    int i = 0;
547    for (Predicate po : complex.predicates) {
548      writer.write("\r\n");
549      boolean first = true;
550      for (Triple o : po.getObjects()) {
551        if (first) {
552          first = false;
553          writer.write(left + " " + po.getPredicate() + " ");
554        } else
555          writer.write(", ");
556        if (o instanceof StringType)
557          writer.write(((StringType) o).value);
558        else {
559          writer.write("[");
560          if (write((Complex) o, writer, indent + 2))
561            writer.write("\r\n" + left + " ]");
562          else
563            writer.write(" ]");
564        }
565      }
566      i++;
567      if (i < complex.predicates.size())
568        writer.write(";");
569      if (!Utilities.noString(po.comment))
570        writer.write(" # " + escape(po.comment, false));
571    }
572    return true;
573  }
574
575  public boolean write(Complex complex, StringBuilder b, int indent) throws Exception {
576    if (complex.predicates.isEmpty())
577      return false;
578    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1
579        && complex.predicates.get(0).getObjects().get(0) instanceof StringType
580        && Utilities.noString(complex.predicates.get(0).comment)) {
581      b.append(" " + complex.predicates.get(0).makelink() + " "
582          + Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value));
583      return false;
584    }
585    String left = Utilities.padLeft("", ' ', indent);
586    int i = 0;
587    for (Predicate po : complex.predicates) {
588      b.append("\r\n");
589      boolean first = true;
590      for (Triple o : po.getObjects()) {
591        if (first) {
592          first = false;
593          b.append(left + " " + po.makelink() + " ");
594        } else
595          b.append(", ");
596        if (o instanceof StringType)
597          b.append(Utilities.escapeXml(((StringType) o).value));
598        else {
599          b.append("[");
600          if (write((Complex) o, b, indent + 2))
601            b.append(left + " ]");
602          else
603            b.append(" ]");
604        }
605      }
606      i++;
607      if (i < complex.predicates.size())
608        b.append(";");
609      if (!Utilities.noString(po.comment))
610        b.append(" # " + Utilities.escapeXml(escape(po.comment, false)));
611    }
612    return true;
613  }
614
615  public abstract class TTLObject {
616    protected int line;
617    protected int col;
618
619    abstract public boolean hasValue(String value);
620
621    public int getLine() {
622      return line;
623    }
624
625    public int getCol() {
626      return col;
627    }
628
629  }
630
631  public class TTLLiteral extends TTLObject {
632
633    private String value;
634    private String type;
635
636    protected TTLLiteral(int line, int col) {
637      this.line = line;
638      this.col = col;
639    }
640
641    @Override
642    public boolean hasValue(String value) {
643      return value.equals(this.value);
644    }
645
646    public String getValue() {
647      return value;
648    }
649
650    public String getType() {
651      return type;
652    }
653
654  }
655
656  public class TTLURL extends TTLObject {
657    private String uri;
658
659    protected TTLURL(int line, int col) {
660      this.line = line;
661      this.col = col;
662    }
663
664    public String getUri() {
665      return uri;
666    }
667
668    public void setUri(String uri) throws FHIRFormatError {
669      if (!uri.matches(IRI_URL))
670        throw new FHIRFormatError("Illegal URI " + uri);
671      this.uri = uri;
672    }
673
674    @Override
675    public boolean hasValue(String value) {
676      return value.equals(this.uri);
677    }
678  }
679
680  public class TTLList extends TTLObject {
681    private List<TTLObject> list = new ArrayList<Turtle.TTLObject>();
682
683    public TTLList(TTLObject obj) {
684      super();
685      list.add(obj);
686    }
687
688    @Override
689    public boolean hasValue(String value) {
690      for (TTLObject obj : list)
691        if (obj.hasValue(value))
692          return true;
693      return false;
694    }
695
696    public List<TTLObject> getList() {
697      return list;
698    }
699
700  }
701
702  public class TTLComplex extends TTLObject {
703    private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>();
704
705    protected TTLComplex(int line, int col) {
706      this.line = line;
707      this.col = col;
708    }
709
710    public Map<String, TTLObject> getPredicates() {
711      return predicates;
712    }
713
714    @Override
715    public boolean hasValue(String value) {
716      return false;
717    }
718
719    public void addPredicate(String uri, TTLObject obj) {
720      if (!predicates.containsKey(uri))
721        predicates.put(uri, obj);
722      else {
723        TTLObject eo = predicates.get(uri);
724        TTLList list = null;
725        if (eo instanceof TTLList)
726          list = (TTLList) eo;
727        else {
728          list = new TTLList(eo);
729          predicates.put(uri, list);
730        }
731        list.list.add(obj);
732      }
733    }
734
735    public void addPredicates(Map<String, TTLObject> values) {
736      for (String s : values.keySet()) {
737        addPredicate(s, values.get(s));
738      }
739    }
740  }
741
742  private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>();
743
744  private Object base;
745
746  public enum LexerTokenType {
747    TOKEN, // [, ], :, @
748    WORD, // a word
749    URI, // a URI <>
750    LITERAL // "..."
751  }
752
753  public class Lexer {
754
755    private String source;
756    private LexerTokenType type;
757    private int cursor, line, col, startLine, startCol;
758    private String token;
759
760    public Lexer(String source) throws FHIRFormatError {
761      this.source = source;
762      cursor = 0;
763      line = 1;
764      col = 1;
765      readNext(false);
766    }
767
768    private void skipWhitespace() {
769      while (cursor < source.length()) {
770        char ch = source.charAt(cursor);
771        if (Character.isWhitespace(ch))
772          grab();
773        else if (ch == '#') {
774          ch = grab();
775          while (cursor < source.length()) {
776            ch = grab();
777            if (ch == '\r' || ch == '\n') {
778              break;
779            }
780          }
781        } else
782          break;
783      }
784    }
785
786    private char grab() {
787      char c = source.charAt(cursor);
788      if (c == '\n') {
789        line++;
790        col = 1;
791      } else
792        col++;
793
794      cursor++;
795      return c;
796    }
797
798    private void readNext(boolean postColon) throws FHIRFormatError {
799      token = null;
800      type = null;
801      skipWhitespace();
802      if (cursor >= source.length())
803        return;
804      startLine = line;
805      startCol = col;
806      char ch = grab();
807      StringBuilder b = new StringBuilder();
808      switch (ch) {
809      case '@':
810      case '.':
811      case ':':
812      case ';':
813      case '^':
814      case ',':
815      case ']':
816      case '[':
817      case '(':
818      case ')':
819        type = LexerTokenType.TOKEN;
820        b.append(ch);
821        token = b.toString();
822        return;
823      case '<':
824        while (cursor < source.length()) {
825          ch = grab();
826          if (ch == '>')
827            break;
828          b.append(ch);
829        }
830        type = LexerTokenType.URI;
831        token = unescape(b.toString(), true);
832        return;
833      case '"':
834        b.append(ch);
835        String end = "\"";
836        while (cursor < source.length()) {
837          ch = grab();
838          if (b.length() == 2 && ch != '"' && b.equals("\"\"")) {
839            cursor--;
840            break;
841          }
842          b.append(ch);
843          if (ch == '"')
844            if (b.toString().equals("\"\"\""))
845              end = "\"\"\"";
846            else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\" + end))
847              break;
848        }
849        type = LexerTokenType.LITERAL;
850        token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false);
851        return;
852      case '\'':
853        b.append(ch);
854        end = "'";
855        while (cursor < source.length()) {
856          ch = grab();
857          if (b.equals("''") && ch != '\'') {
858            cursor--;
859            break;
860          }
861          b.append(ch);
862          if (b.toString().equals("'''"))
863            end = "'''";
864          else if (!b.toString().equals("''") && b.toString().endsWith(end))
865            break;
866        }
867        type = LexerTokenType.LITERAL;
868        token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false);
869        return;
870      default:
871        if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z')
872            || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) {
873          b.append(ch);
874          while (cursor < source.length()) {
875            ch = grab();
876            // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a',
877            // 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_',
878            // '-', '+', '.', '\\', '#'))
879            if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~')
880                || ((ch == ':') && !postColon))
881              break;
882            b.append(ch);
883          }
884          type = LexerTokenType.WORD;
885          token = b.toString();
886          cursor--;
887          return;
888        } else
889          throw error("unexpected lexer char " + ch);
890      }
891    }
892
893    private String unescape(String s, boolean isUri) throws FHIRFormatError {
894      StringBuilder b = new StringBuilder();
895      int i = 0;
896      while (i < s.length()) {
897        char ch = s.charAt(i);
898        if (ch == '\\' && i < s.length() - 1) {
899          i++;
900          switch (s.charAt(i)) {
901          case 't':
902            b.append('\t');
903            break;
904          case 'r':
905            b.append('\r');
906            break;
907          case 'n':
908            b.append('\n');
909            break;
910          case 'f':
911            b.append('\f');
912            break;
913          case '\'':
914            b.append('\'');
915            break;
916          case '\"':
917            b.append('\"');
918            break;
919          case '\\':
920            b.append('\\');
921            break;
922          case '/':
923            b.append('\\');
924            break;
925          case 'U':
926          case 'u':
927            i++;
928            int l = 4;
929            int uc = Integer.parseInt(s.substring(i, i + l), 16);
930            if (uc < (isUri ? 33 : 32)) {
931              l = 8;
932              uc = Integer.parseInt(s.substring(i, i + 8), 16);
933            }
934            if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
935              throw new FHIRFormatError("Illegal unicode character");
936            b.append(Character.toString(uc));
937            i = i + l;
938            break;
939          default:
940            throw new FHIRFormatError("Unknown character escape \\" + s.charAt(i));
941          }
942        } else {
943          b.append(ch);
944        }
945        i++;
946      }
947      return b.toString();
948    }
949
950    public boolean done() {
951      return type == null;
952    }
953
954    public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError {
955      if (type != null && this.type != type)
956        throw error("Unexpected type. Found " + this.type.toString() + " looking for a " + type.toString());
957      String res = token;
958      readNext(postColon);
959      return res;
960    }
961
962    public String peek() throws Exception {
963      return token;
964    }
965
966    public LexerTokenType peekType() {
967      return type;
968    }
969
970    public void token(String token) throws FHIRFormatError {
971      if (!token.equals(this.token))
972        throw error("Unexpected word " + this.token + " looking for " + token);
973      next(LexerTokenType.TOKEN, token.equals(":"));
974    }
975
976    public void word(String word) throws Exception {
977      if (!word.equals(this.token))
978        throw error("Unexpected word " + this.token + " looking for " + word);
979      next(LexerTokenType.WORD, false);
980    }
981
982    public String word() throws FHIRFormatError {
983      String t = token;
984      next(LexerTokenType.WORD, false);
985      return t;
986    }
987
988    public String uri() throws FHIRFormatError {
989      if (this.type != LexerTokenType.URI)
990        throw error("Unexpected type. Found " + this.type.toString() + " looking for a URI");
991      String t = token;
992      next(LexerTokenType.URI, false);
993      return t;
994    }
995
996    public String literal() throws FHIRFormatError {
997      if (this.type != LexerTokenType.LITERAL)
998        throw error("Unexpected type. Found " + this.type.toString() + " looking for a Literal");
999      String t = token;
1000      next(LexerTokenType.LITERAL, false);
1001      return t;
1002    }
1003
1004    public boolean peek(LexerTokenType type, String token) {
1005      return this.type == type && this.token.equals(token);
1006    }
1007
1008    public FHIRFormatError error(String message) {
1009      return new FHIRFormatError("Syntax Error parsing Turtle on line " + Integer.toString(line) + " col "
1010          + Integer.toString(col) + ": " + message);
1011    }
1012
1013  }
1014  //
1015  // public void importTtl(Section sct, String ttl) throws Exception {
1016  // if (!Utilities.noString(ttl)) {
1017  // // System.out.println("import ttl: "+ttl);
1018  // Lexer lexer = new Lexer(ttl);
1019  // String subject = null;
1020  // String predicate = null;
1021  // while (!lexer.done()) {
1022  // if (subject == null)
1023  // subject = lexer.next();
1024  // if (predicate == null)
1025  // predicate = lexer.next();
1026  // if (lexer.peekType() == null) {
1027  // throw new Error("Unexpected end of input parsing turtle");
1028  // } if (lexer.peekType() == LexerTokenType.TOKEN) {
1029  // sct.triple(subject, predicate, lexer.next());
1030  // } else if (lexer.peek() == null) {
1031  // throw new Error("Unexected - turtle lexer found no token");
1032  // } else if (lexer.peek().equals("[")) {
1033  // sct.triple(subject, predicate, importComplex(lexer));
1034  // } else
1035  // throw new Exception("Not done yet");
1036  // String n = lexer.next();
1037  // if (Utilities.noString(n))
1038  // break;
1039  // if (n.equals(".")) {
1040  // subject = null;
1041  // predicate = null;
1042  // } else if (n.equals(";")) {
1043  // predicate = null;
1044  // } else if (!n.equals(","))
1045  // throw new Exception("Unexpected token "+n);
1046  // }
1047  // }
1048  // }
1049
1050  public void parse(String source) throws FHIRFormatError {
1051    prefixes.clear();
1052    prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#");
1053    parse(new Lexer(source));
1054  }
1055
1056  private void parse(Lexer lexer) throws FHIRFormatError {
1057    boolean doPrefixes = true;
1058    while (!lexer.done()) {
1059      if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX")
1060          || lexer.peek(LexerTokenType.WORD, "BASE"))) {
1061        boolean sparqlStyle = false;
1062        boolean base = false;
1063        if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1064          lexer.token("@");
1065          String p = lexer.word();
1066          if (p.equals("base"))
1067            base = true;
1068          else if (!p.equals("prefix"))
1069            throw new FHIRFormatError("Unexpected token " + p);
1070        } else {
1071          sparqlStyle = true;
1072          String p = lexer.word();
1073          if (p.equals("BASE"))
1074            base = true;
1075          else if (!p.equals("PREFIX"))
1076            throw new FHIRFormatError("Unexpected token " + p);
1077        }
1078        String prefix = null;
1079        if (!base) {
1080          prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null;
1081          lexer.token(":");
1082        }
1083        String url = lexer.next(LexerTokenType.URI, false);
1084        if (!sparqlStyle)
1085          lexer.token(".");
1086        if (!base)
1087          prefix(prefix, url);
1088        else if (this.base == null)
1089          this.base = url;
1090        else
1091          throw new FHIRFormatError("Duplicate @base");
1092      } else if (lexer.peekType() == LexerTokenType.URI) {
1093        doPrefixes = false;
1094        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1095        uri.setUri(lexer.uri());
1096        TTLComplex complex = parseComplex(lexer);
1097        objects.put(uri, complex);
1098        lexer.token(".");
1099      } else if (lexer.peekType() == LexerTokenType.WORD) {
1100        doPrefixes = false;
1101        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1102        String pfx = lexer.word();
1103        if (!prefixes.containsKey(pfx))
1104          throw new FHIRFormatError("Unknown prefix " + pfx);
1105        lexer.token(":");
1106        uri.setUri(prefixes.get(pfx) + lexer.word());
1107        TTLComplex complex = parseComplex(lexer);
1108        objects.put(uri, complex);
1109        lexer.token(".");
1110      } else if (lexer.peek(LexerTokenType.TOKEN, ":")) {
1111        doPrefixes = false;
1112        TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1113        lexer.token(":");
1114        if (!prefixes.containsKey(null))
1115          throw new FHIRFormatError("Unknown prefix ''");
1116        uri.setUri(prefixes.get(null) + lexer.word());
1117        TTLComplex complex = parseComplex(lexer);
1118        objects.put(uri, complex);
1119        lexer.token(".");
1120      } else if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1121        doPrefixes = false;
1122        lexer.token("[");
1123        TTLComplex bnode = parseComplex(lexer);
1124        lexer.token("]");
1125        TTLComplex complex = null;
1126        if (!lexer.peek(LexerTokenType.TOKEN, ".")) {
1127          complex = parseComplex(lexer);
1128          // at this point, we collapse bnode and complex, and give bnode a fictional
1129          // identity
1130          bnode.addPredicates(complex.predicates);
1131        }
1132
1133        objects.put(anonymousId(), bnode);
1134        lexer.token(".");
1135      } else
1136        throw lexer.error("Unknown token " + lexer.token);
1137    }
1138  }
1139
1140  private TTLURL anonymousId() throws FHIRFormatError {
1141    TTLURL url = new TTLURL(-1, -1);
1142    url.setUri("urn:uuid:" + UUID.randomUUID().toString().toLowerCase());
1143    return url;
1144  }
1145
1146  private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError {
1147    TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol);
1148
1149    boolean done = lexer.peek(LexerTokenType.TOKEN, "]");
1150    while (!done) {
1151      String uri = null;
1152      if (lexer.peekType() == LexerTokenType.URI)
1153        uri = lexer.uri();
1154      else {
1155        String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1156        if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) {
1157          lexer.token(":");
1158          if (!prefixes.containsKey(t))
1159            throw new FHIRFormatError("unknown prefix " + t);
1160          uri = prefixes.get(t) + lexer.word();
1161        } else if (t.equals("a"))
1162          uri = prefixes.get("rdfs") + "type";
1163        else
1164          throw lexer.error("unexpected token");
1165      }
1166
1167      boolean inlist = false;
1168      if (lexer.peek(LexerTokenType.TOKEN, "(")) {
1169        inlist = true;
1170        lexer.token("(");
1171      }
1172
1173      boolean rpt = false;
1174      do {
1175        if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1176          lexer.token("[");
1177          result.addPredicate(uri, parseComplex(lexer));
1178          lexer.token("]");
1179        } else if (lexer.peekType() == LexerTokenType.URI) {
1180          TTLURL u = new TTLURL(lexer.startLine, lexer.startCol);
1181          u.setUri(lexer.uri());
1182          result.addPredicate(uri, u);
1183        } else if (lexer.peekType() == LexerTokenType.LITERAL) {
1184          TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol);
1185          u.value = lexer.literal();
1186          if (lexer.peek(LexerTokenType.TOKEN, "^")) {
1187            lexer.token("^");
1188            lexer.token("^");
1189            if (lexer.peekType() == LexerTokenType.URI) {
1190              u.type = lexer.uri();
1191            } else {
1192              String l = lexer.word();
1193              lexer.token(":");
1194              u.type = prefixes.get(l) + lexer.word();
1195            }
1196          }
1197          if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1198            // lang tag - skip it
1199            lexer.token("@");
1200            String lang = lexer.word();
1201            if (!lang.matches(LANG_REGEX)) {
1202              throw new FHIRFormatError("Invalid Language tag " + lang);
1203            }
1204          }
1205          result.addPredicate(uri, u);
1206        } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) {
1207          int sl = lexer.startLine;
1208          int sc = lexer.startCol;
1209          String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1210          if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1211            TTLLiteral u = new TTLLiteral(sl, sc);
1212            u.value = pfx;
1213            result.addPredicate(uri, u);
1214          } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1215            TTLLiteral u = new TTLLiteral(sl, sc);
1216            u.value = pfx;
1217            result.addPredicate(uri, u);
1218          } else {
1219            if (!prefixes.containsKey(pfx))
1220              throw new FHIRFormatError("Unknown prefix " + (pfx == null ? "''" : pfx));
1221            TTLURL u = new TTLURL(sl, sc);
1222            lexer.token(":");
1223            u.setUri(prefixes.get(pfx) + lexer.word());
1224            result.addPredicate(uri, u);
1225          }
1226        } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) {
1227          throw new FHIRFormatError("unexpected token " + lexer.token);
1228        }
1229
1230        if (inlist)
1231          rpt = !lexer.peek(LexerTokenType.TOKEN, ")");
1232        else {
1233          rpt = lexer.peek(LexerTokenType.TOKEN, ",");
1234          if (rpt)
1235            lexer.readNext(false);
1236        }
1237      } while (rpt);
1238      if (inlist)
1239        lexer.token(")");
1240
1241      if (lexer.peek(LexerTokenType.TOKEN, ";")) {
1242        while ((lexer.peek(LexerTokenType.TOKEN, ";")))
1243          lexer.token(";");
1244        done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]");
1245      } else {
1246        done = true;
1247      }
1248    }
1249    return result;
1250  }
1251
1252  public Map<TTLURL, TTLComplex> getObjects() {
1253    return objects;
1254  }
1255
1256  public TTLComplex getObject(String url) {
1257    for (TTLURL t : objects.keySet()) {
1258      if (t.getUri().equals(url))
1259        return objects.get(t);
1260    }
1261    return null;
1262  }
1263
1264  // public void parseFragment(Lexer lexer) throws Exception {
1265  // lexer.next(); // read [
1266  // Complex obj = new Complex();
1267  // while (!lexer.peek().equals("]")) {
1268  // String predicate = lexer.next();
1269  // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() ==
1270  // LexerTokenType.LITERAL) {
1271  // obj.predicate(predicate, lexer.next());
1272  // } else if (lexer.peek().equals("[")) {
1273  // obj.predicate(predicate, importComplex(lexer));
1274  // } else
1275  // throw new Exception("Not done yet");
1276  // if (lexer.peek().equals(";"))
1277  // lexer.next();
1278  // }
1279  // lexer.next(); // read ]
1280  // //return obj;
1281  // }
1282  //
1283  // public void importTtl(Section sct, String ttl) throws Exception {
1284  // if (!Utilities.noString(ttl)) {
1285  // // System.out.println("import ttl: "+ttl);
1286  // Lexer lexer = new Lexer(ttl);
1287  // String subject = null;
1288  // String predicate = null;
1289  // while (!lexer.done()) {
1290  // if (subject == null)
1291  // subject = lexer.next();
1292  // if (predicate == null)
1293  // predicate = lexer.next();
1294  // if (lexer.peekType() == null) {
1295  // throw new Error("Unexpected end of input parsing turtle");
1296  // } if (lexer.peekType() == LexerTokenType.TOKEN) {
1297  // sct.triple(subject, predicate, lexer.next());
1298  // } else if (lexer.peek() == null) {
1299  // throw new Error("Unexected - turtle lexer found no token");
1300  // } else if (lexer.peek().equals("[")) {
1301  // sct.triple(subject, predicate, importComplex(lexer));
1302  // } else
1303  // throw new Exception("Not done yet");
1304  // String n = lexer.next();
1305  // if (Utilities.noString(n))
1306  // break;
1307  // if (n.equals(".")) {
1308  // subject = null;
1309  // predicate = null;
1310  // } else if (n.equals(";")) {
1311  // predicate = null;
1312  // } else if (!n.equals(","))
1313  // throw new Exception("Unexpected token "+n);
1314  // }
1315  // }
1316  // }
1317
1318  // private Complex importComplex(Lexer lexer) throws Exception {
1319  // lexer.next(); // read [
1320  // Complex obj = new Complex();
1321  // while (!lexer.peek().equals("]")) {
1322  // String predicate = lexer.next();
1323  // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() ==
1324  // LexerTokenType.LITERAL) {
1325  // obj.predicate(predicate, lexer.next());
1326  // } else if (lexer.peek().equals("[")) {
1327  // obj.predicate(predicate, importComplex(lexer));
1328  // } else
1329  // throw new Exception("Not done yet");
1330  // if (lexer.peek().equals(";"))
1331  // lexer.next();
1332  // }
1333  // lexer.next(); // read ]
1334  // return obj;
1335  // }
1336
1337}