001package org.hl7.fhir.dstu3.utils.formats;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.OutputStream;
036import java.io.OutputStreamWriter;
037import java.io.UnsupportedEncodingException;
038import java.util.ArrayList;
039import java.util.Collections;
040import java.util.HashMap;
041import java.util.HashSet;
042import java.util.List;
043import java.util.Map;
044import java.util.Set;
045import java.util.UUID;
046
047import org.hl7.fhir.exceptions.FHIRFormatError;
048import org.hl7.fhir.utilities.Utilities;
049
050public class Turtle {
051
052        public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
053
054  public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 
055  public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
056
057        // Object model
058        public abstract class Triple {
059                private String uri;
060        }
061
062        public class StringType extends Triple {
063                private String value;
064
065                public StringType(String value) {
066                        super();
067                        this.value = value;
068                }
069        }
070
071        public class Complex extends Triple {
072                protected List<Predicate> predicates = new ArrayList<Predicate>();
073
074                public Complex predicate(String predicate, String object) {
075                        predicateSet.add(predicate);
076                        objectSet.add(object);
077                        return predicate(predicate, new StringType(object));
078                }
079
080    public Complex linkedPredicate(String predicate, String object, String link) {
081      predicateSet.add(predicate);
082      objectSet.add(object);
083      return linkedPredicate(predicate, new StringType(object), link);
084    }
085
086                public Complex predicate(String predicate, Triple object) {
087      Predicate p = getPredicate(predicate);
088      if (p == null) {
089        p = new Predicate();
090                        p.predicate = predicate;
091                        predicateSet.add(predicate);
092        predicates.add(p);
093      }
094                        if (object instanceof StringType)
095                                objectSet.add(((StringType) object).value);
096      p.objects.add(object);
097                        return this;
098                }
099
100    protected Predicate getPredicate(String predicate) {
101      for (Predicate p : predicates)
102        if (p.predicate.equals(predicate))
103          return p;
104      return null;
105    }
106
107    public Complex linkedPredicate(String predicate, Triple object, String link) {
108      Predicate p = getPredicate(predicate);
109      if (p == null) {
110        p = new Predicate();
111      p.predicate = predicate;
112      p.link = link;
113      predicateSet.add(predicate);
114        predicates.add(p);
115      }
116      if (object instanceof StringType)
117        objectSet.add(((StringType) object).value);
118      p.objects.add(object);
119      return this;
120    }
121
122                public Complex predicate(String predicate) {
123                        predicateSet.add(predicate);
124                        Complex c = complex();
125                        predicate(predicate, c);
126                        return c;
127                }
128
129    public Complex linkedPredicate(String predicate, String link) {
130      predicateSet.add(predicate);
131      Complex c = complex();
132      linkedPredicate(predicate, c, link);
133      return c;
134    }
135
136                public void prefix(String code, String url) {
137                        Turtle.this.prefix(code, url);
138                }
139        }
140
141        private class Predicate {
142                protected String predicate;
143                protected String link;
144    protected List<Triple> objects = new ArrayList<Turtle.Triple>();
145                protected String comment;
146
147                public String getPredicate() {
148                        return predicate;
149                }
150                public String makelink() {
151      if (link == null)
152        return predicate;
153      else
154        return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>";
155    }
156                
157    public List<Triple> getObjects() {
158      return objects;
159                }
160                public String getComment() {
161                        return comment;
162                }
163        }
164
165        public class Subject extends Complex {
166                private String id;
167
168                public Predicate predicate(String predicate, Triple object, String comment) {
169      Predicate p = getPredicate(predicate);
170      if (p == null) {
171        p = new Predicate();
172                        p.predicate = predicate;
173                        predicateSet.add(predicate);
174                        predicates.add(p);
175                        p.comment = comment; 
176      }
177      if (object instanceof StringType)
178        objectSet.add(((StringType) object).value);
179      p.objects.add(object);
180                        return p;
181                }
182
183                public void comment(String comment) {
184                        if (!Utilities.noString(comment)) {
185                                predicate("rdfs:comment", literal(comment));
186                                predicate("dcterms:description", literal(comment));
187                        }
188                }
189
190                public void label(String label) {
191                        if (!Utilities.noString(label)) {
192                                predicate("rdfs:label", literal(label));
193                                predicate("dc:title", literal(label));
194                        }
195                }
196
197        }
198
199        public class Section {
200                private String name;
201                private List<Subject> subjects = new ArrayList<Subject>();
202
203                public Subject triple(String subject, String predicate, String object, String comment) {
204                        return triple(subject, predicate, new StringType(object), comment);
205                }
206
207                public Subject triple(String subject, String predicate, String object) {
208                        return triple(subject, predicate, new StringType(object));
209                }
210
211                public Subject triple(String subject, String predicate, Triple object) {
212                        return triple(subject, predicate, object, null);     
213                }
214
215                public Subject triple(String subject, String predicate, Triple object, String comment) {
216                        Subject s = subject(subject);
217                        s.predicate(predicate, object, comment);
218                        return s;
219                }
220
221                public void comment(String subject, String comment) {
222                        triple(subject, "rdfs:comment", literal(comment));
223                        triple(subject, "dcterms:description", literal(comment));
224                }
225
226                public void label(String subject, String comment) {
227                        triple(subject, "rdfs:label", literal(comment));
228                        triple(subject, "dc:title", literal(comment));
229                }
230
231                public Subject subject(String subject) {
232                        for (Subject ss : subjects) 
233                                if (ss.id.equals(subject))
234                                        return ss;
235                        Subject s = new Subject();
236                        s.id = subject;
237                        subjects.add(s);
238                        return s;
239                }
240
241    public boolean hasSubject(String subject) {
242      for (Subject ss : subjects) 
243        if (ss.id.equals(subject))
244          return true;
245      return false;
246    }
247        }
248
249        private List<Section> sections = new ArrayList<Section>();
250        protected Set<String> subjectSet = new HashSet<String>();
251        protected Set<String> predicateSet = new HashSet<String>();
252        protected Set<String> objectSet = new HashSet<String>();
253        protected Map<String, String> prefixes = new HashMap<String, String>();
254
255        public void prefix(String code, String url) {
256                prefixes.put(code, url);
257        }
258
259        protected boolean hasSection(String sn) {
260                for (Section s : sections)
261                        if (s.name.equals(sn))
262                                return true;
263                return false;
264
265        }
266
267        public Section section(String sn) {
268                if (hasSection(sn))
269                        throw new Error("Duplicate section name "+sn);
270                Section s = new Section();
271                s.name = sn;
272                sections.add(s);
273                return s;
274        }
275
276        protected String matches(String url, String prefixUri, String prefix) {
277                if (url.startsWith(prefixUri)) {
278                        prefixes.put(prefix, prefixUri);
279                        return prefix+":"+escape(url.substring(prefixUri.length()), false);
280                }
281                return null;
282        }
283
284        protected Complex complex() {
285                return new Complex();
286        }
287
288        private void checkPrefix(Triple object) {
289                if (object instanceof StringType)
290                        checkPrefix(((StringType) object).value);
291                else {
292                        Complex obj = (Complex) object;
293                        for (Predicate po : obj.predicates) {
294                                checkPrefix(po.getPredicate());
295        for (Triple o : po.getObjects())
296          checkPrefix(o);
297                        }
298                }
299        }
300
301        protected void checkPrefix(String pname) {
302                if (pname.startsWith("("))
303                        return;
304                if (pname.startsWith("\""))
305                        return;
306                if (pname.startsWith("<"))
307                        return;
308
309                if (pname.contains(":")) {
310                        String prefix = pname.substring(0, pname.indexOf(":"));
311                        if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn"))
312                                throw new Error("undefined prefix "+prefix); 
313                }
314        }
315
316        protected StringType literal(String s) {
317                return new StringType("\""+escape(s, true)+"\"");
318        }
319
320  protected StringType literalTyped(String s, String t) {
321    return new StringType("\""+escape(s, true)+"\"^^xs:"+t);
322  }
323
324        public static String escape(String s, boolean string) {
325                if (s == null)
326                        return "";
327
328                StringBuilder b = new StringBuilder();
329                for (char c : s.toCharArray()) {
330                        if (c == '\r')
331                                b.append("\\r");
332                        else if (c == '\n')
333                                b.append("\\n");
334                        else if (c == '"')
335                                b.append("\\\"");
336                        else if (c == '\\')
337                                b.append("\\\\");
338                        else if (c == '/' && !string)
339                                b.append("\\/");
340                        else 
341                                b.append(c);
342                }   
343                return b.toString();
344        }
345
346        protected String pctEncode(String s) {
347                if (s == null)
348                        return "";
349
350                StringBuilder b = new StringBuilder();
351                for (char c : s.toCharArray()) {
352                        if (c >= 'A' && c <= 'Z')
353                                b.append(c);
354                        else if (c >= 'a' && c <= 'z')
355                                b.append(c);
356                        else if (c >= '0' && c <= '9')
357                                b.append(c);
358                        else if (c == '.')
359                                b.append(c);
360                        else 
361                                b.append("%"+Integer.toHexString(c));
362                }   
363                return b.toString();
364        }
365
366        protected List<String> sorted(Set<String> keys) {
367                List<String> names = new ArrayList<String>();
368                names.addAll(keys);
369                Collections.sort(names);
370                return names;
371        }
372
373        public void commit(OutputStream destination, boolean header) throws IOException {
374                LineOutputStreamWriter writer = new LineOutputStreamWriter(destination);
375                commitPrefixes(writer, header);
376                for (Section s : sections) {
377                        commitSection(writer, s);
378                }
379                writer.ln("# -------------------------------------------------------------------------------------");
380                writer.ln();
381                writer.flush();
382                writer.close();
383        }
384
385  public String asHtml() throws Exception {
386    StringBuilder b = new StringBuilder();
387    b.append("<pre class=\"rdf\">\r\n");
388    commitPrefixes(b);
389    for (Section s : sections) {
390      commitSection(b, s);
391    }
392    b.append("</pre>\r\n");
393    b.append("\r\n");
394    return b.toString();
395  }
396
397        private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException {
398                if (header) {
399                        writer.ln("# FHIR Sub-definitions");
400                        writer.write("# This is work in progress, and may change rapidly \r\n");
401                        writer.ln();
402                        writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 
403                        writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n");
404                        writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n");
405                        writer.write("# appropriate\" means that the predicates are a faithful representation \r\n");
406                        writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n");
407                        writer.ln();
408                        writer.write("# Where the community agrees on additional predicate statements (such \r\n");
409                        writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n");
410                        writer.write("# predicates \r\n");
411                        writer.ln();
412                        writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n");
413                        writer.ln();
414                        writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n");
415                        writer.ln();
416                }
417                for (String p : sorted(prefixes.keySet()))
418                        writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> .");
419                writer.ln();
420                if (header) {
421                        writer.ln("# Predicates used in this file:");
422                        for (String s : sorted(predicateSet)) 
423                                writer.ln(" # "+s);
424                        writer.ln();
425                }
426        }
427
428  private void commitPrefixes(StringBuilder b) throws Exception {
429    for (String p : sorted(prefixes.keySet()))
430      b.append("@prefix "+p+": &lt;"+prefixes.get(p)+"&gt; .\r\n");
431    b.append("\r\n");
432  }
433
434        //  private String lastSubject = null;
435        //  private String lastComment = "";
436
437        private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException {
438                writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length()));
439                writer.ln();
440                for (Subject sbj : section.subjects) {
441      if (Utilities.noString(sbj.id)) {
442        writer.write("[");
443      } else {
444                        writer.write(sbj.id);
445                        writer.write(" ");
446      }
447                        int i = 0;
448
449                        for (Predicate p : sbj.predicates) {
450                                writer.write(p.getPredicate());
451                                writer.write(" ");
452        boolean first = true;
453        for (Triple o : p.getObjects()) {
454          if (first)
455            first = false;
456          else
457            writer.write(", ");
458          if (o instanceof StringType)
459            writer.write(((StringType) o).value);
460                                else {
461                                        writer.write("[");
462            if (write((Complex) o, writer, 4))
463                                                writer.write("\r\n  ]");
464                                        else
465                                                writer.write("]");
466                                }
467        }
468                                String comment = p.comment == null? "" : " # "+p.comment;
469                                i++;
470                                if (i < sbj.predicates.size())
471                                        writer.write(";"+comment+"\r\n  ");
472        else {
473          if (Utilities.noString(sbj.id)) 
474            writer.write("]");
475          writer.write(" ."+comment+"\r\n\r\n");
476                }
477          }
478        }
479  }
480
481  private void commitSection(StringBuilder b, Section section) throws Exception {
482    b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n");
483    b.append("\r\n");
484    for (Subject sbj : section.subjects) {
485      b.append(Utilities.escapeXml(sbj.id));
486      b.append(" ");
487      int i = 0;
488
489      for (Predicate p : sbj.predicates) {
490        b.append(p.makelink());
491        b.append(" ");
492        boolean first = true;
493        for (Triple o : p.getObjects()) {
494          if (first)
495            first = false;
496          else
497            b.append(", ");
498          if (o instanceof StringType)
499            b.append(Utilities.escapeXml(((StringType) o).value));
500        else {
501          b.append("[");
502            if (write((Complex) o, b, 4))
503            b.append("\r\n  ]");
504          else
505            b.append("]");
506        }
507        }
508        String comment = p.comment == null? "" : " # "+p.comment;
509        i++;
510        if (i < sbj.predicates.size())
511          b.append(";"+Utilities.escapeXml(comment)+"\r\n  ");
512        else
513          b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n");
514      }
515    }
516  }
517
518        protected class LineOutputStreamWriter extends OutputStreamWriter {
519                private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException {
520                        super(out, "UTF-8");
521                }
522
523                private void ln() throws IOException {
524                        write("\r\n");
525                }
526
527                private void ln(String s) throws IOException {
528                        write(s);
529                        write("\r\n");
530                }
531        }
532
533        public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException {
534                if (complex.predicates.isEmpty()) 
535                        return false;
536    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
537      writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value);
538                        return false;
539                }
540                String left = Utilities.padLeft("", ' ', indent);
541                int i = 0;
542                for (Predicate po : complex.predicates) {
543                        writer.write("\r\n");
544      boolean first = true;
545      for (Triple o : po.getObjects()) {
546        if (first) {
547          first = false;
548          writer.write(left+" "+po.getPredicate()+" ");
549        } else
550          writer.write(", ");
551        if (o instanceof StringType)
552          writer.write(((StringType) o).value);
553                        else {
554          writer.write("[");
555          if (write((Complex) o, writer, indent+2))
556            writer.write("\r\n"+left+" ]");
557                                else
558                                        writer.write(" ]");
559                        }
560      }
561                        i++;
562                        if (i < complex.predicates.size())
563                                writer.write(";");
564                        if (!Utilities.noString(po.comment)) 
565                                writer.write(" # "+escape(po.comment, false));
566                }
567                return true;      
568        }
569
570  public boolean write(Complex complex, StringBuilder b, int indent) throws Exception {
571    if (complex.predicates.isEmpty()) 
572      return false;
573    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
574      b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value));
575      return false;
576    }
577    String left = Utilities.padLeft("", ' ', indent);
578    int i = 0;
579    for (Predicate po : complex.predicates) {
580      b.append("\r\n");
581      boolean first = true;
582      for (Triple o : po.getObjects()) {
583        if (first) {
584          first = false;
585          b.append(left+" "+po.makelink()+" ");
586        } else
587          b.append(", ");
588        if (o instanceof StringType)
589          b.append(Utilities.escapeXml(((StringType) o).value));
590      else {
591          b.append("[");
592          if (write((Complex) o, b, indent+2))
593          b.append(left+" ]");
594        else
595          b.append(" ]");
596      }
597      }
598      i++;
599      if (i < complex.predicates.size())
600        b.append(";");
601      if (!Utilities.noString(po.comment)) 
602        b.append(" # "+Utilities.escapeXml(escape(po.comment, false)));
603    }
604    return true;      
605  }
606
607
608  public abstract class TTLObject {
609                protected int line;
610                protected int col;
611
612    abstract public boolean hasValue(String value);
613
614    public int getLine() {
615      return line;
616    }
617
618    public int getCol() {
619      return col;
620    }
621    
622    
623        }
624
625
626        public class TTLLiteral extends TTLObject {
627
628                private String value;
629                private String type;
630                protected TTLLiteral(int line, int col) {
631                        this.line = line;
632                        this.col = col;
633                }
634    @Override
635    public boolean hasValue(String value) {
636      return value.equals(this.value);
637    }
638    public String getValue() {
639      return value;
640    }
641    public String getType() {
642      return type;
643    }
644
645        }
646
647        public class TTLURL extends TTLObject {
648                private String uri;
649
650                protected TTLURL(int line, int col) {
651                        this.line = line;
652                        this.col = col;
653                }
654
655                public String getUri() {
656                        return uri;
657                }
658
659    public void setUri(String uri) throws FHIRFormatError {
660                        if (!uri.matches(IRI_URL))
661        throw new FHIRFormatError("Illegal URI "+uri);
662                        this.uri = uri;
663                }
664
665    @Override
666    public boolean hasValue(String value) {
667      return value.equals(this.uri);
668        }
669  }
670
671  public class TTLList extends TTLObject {
672    private List<TTLObject> list = new ArrayList<Turtle.TTLObject>();
673
674    public TTLList(TTLObject obj) {
675      super();
676      list.add(obj);
677    }
678    
679    @Override
680    public boolean hasValue(String value) {
681      for (TTLObject obj : list)
682        if (obj.hasValue(value))
683          return true;
684      return false;
685    }
686
687    public List<TTLObject> getList() {
688      return list;
689    }
690    
691  }
692        public class TTLComplex extends TTLObject {
693                private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>();
694                protected TTLComplex(int line, int col) {
695                        this.line = line;
696                        this.col = col;
697                }
698    public Map<String, TTLObject> getPredicates() {
699      return predicates;
700        }
701    @Override
702    public boolean hasValue(String value) {
703      return false;
704    }
705    public void addPredicate(String uri, TTLObject obj) {
706      if (!predicates.containsKey(uri))
707        predicates.put(uri, obj);
708      else {
709        TTLObject eo = predicates.get(uri);
710        TTLList list = null; 
711        if (eo instanceof TTLList) 
712          list = (TTLList) eo; 
713        else {
714          list = new TTLList(eo);
715          predicates.put(uri, list);
716        }
717        list.list.add(obj);
718      }
719    }
720    public void addPredicates(Map<String, TTLObject> values) {
721      for (String s : values.keySet()) {
722        addPredicate(s, values.get(s));
723      }
724    }
725  }
726
727  private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>();
728
729  private Object base;
730
731        public enum LexerTokenType {
732                TOKEN, // [, ], :, @
733                WORD, // a word 
734                URI, // a URI <>
735                LITERAL // "..."
736        }
737
738        public class Lexer {
739
740
741                private String source;
742                private LexerTokenType type;
743                private int cursor, line, col, startLine, startCol;
744                private String token;
745
746    public Lexer(String source) throws FHIRFormatError {
747                        this.source = source;
748                        cursor = 0;
749                        line = 1;
750                        col = 1;
751      readNext(false);
752                }
753
754                private void skipWhitespace() {
755                        while (cursor < source.length()) {
756                                char ch = source.charAt(cursor);
757                                if (Character.isWhitespace(ch))
758                                        grab();
759                                else if (ch == '#') {
760                                        ch = grab();
761                                        while (cursor < source.length()) {
762                                                ch = grab();
763                                                if (ch == '\r' || ch == '\n') {
764                                                        break;
765                                                }
766                                        }          
767                                } else
768                                        break;
769                        }
770                }
771
772                private char grab() {
773                        char c = source.charAt(cursor);
774                        if (c == '\n') {
775                                line++;
776                                col = 1;
777                        } else
778                                col++;
779
780                        cursor++;
781                        return c;
782                }
783
784    private void readNext(boolean postColon) throws FHIRFormatError {    
785                        token = null;
786                        type = null;
787                        skipWhitespace();
788                        if (cursor >= source.length())
789                                return;
790                        startLine = line;
791                        startCol = col;
792                        char ch = grab();
793                        StringBuilder b = new StringBuilder();
794                        switch (ch) {
795                        case '@':
796                        case '.': 
797                        case ':': 
798                        case ';': 
799                        case '^': 
800                        case ',': 
801                        case ']': 
802                        case '[': 
803                        case '(': 
804                        case ')': 
805                                type = LexerTokenType.TOKEN;
806                                b.append(ch);
807                                token = b.toString();
808                                return;
809                        case '<': 
810                                while (cursor < source.length()) {
811                                        ch = grab();
812                                        if (ch == '>')
813                                                break;
814                                        b.append(ch);
815                                }
816                                type = LexerTokenType.URI;
817                                token = unescape(b.toString(), true);
818                                return;        
819                        case '"': 
820                                b.append(ch);
821                                String end = "\"";
822                                while (cursor < source.length()) {
823                                        ch = grab();
824          if (b.length() == 2 && ch != '"' && b.equals("\"\"")) {
825                                                cursor--;
826                                                break;
827                                        }
828                                        b.append(ch);
829          if (ch == '"')
830                                        if (b.toString().equals("\"\"\""))
831                                                end = "\"\"\"";
832            else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end))
833                                                break;
834                                }
835                                type = LexerTokenType.LITERAL;
836                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
837                                return;        
838                        case '\'': 
839                                b.append(ch);
840                                end = "'";
841                                while (cursor < source.length()) {
842                                        ch = grab();
843                                        if (b.equals("''") && ch != '\'') {
844                                                cursor--;
845                                                break;
846                                        }
847                                        b.append(ch);
848                                        if (b.toString().equals("'''"))
849                                                end = "'''";
850                                        else if (!b.toString().equals("''") && b.toString().endsWith(end))
851                                                break;
852                                }
853                                type = LexerTokenType.LITERAL;
854                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
855                                return;        
856                        default:
857        if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) {
858                                        b.append(ch);
859                                        while (cursor < source.length()) {
860                                                ch = grab();
861            //                                          if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#'))
862            if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon))
863                                                        break;
864                                                b.append(ch);
865                                        }
866                                        type = LexerTokenType.WORD;
867                                        token = b.toString();
868                                        cursor--;
869                                        return;        
870                                } else
871                                        throw error("unexpected lexer char "+ch);
872                        }
873                }
874
875    private String unescape(String s, boolean isUri) throws FHIRFormatError {
876                        StringBuilder b = new StringBuilder();
877                        int i = 0;
878                        while (i < s.length()) {
879                                char ch = s.charAt(i);
880                                if (ch == '\\' && i < s.length()-1) {
881                                        i++;
882                                        switch (s.charAt(i)) {
883                                        case 't': 
884                                                b.append('\t');
885                                                break;
886                                        case 'r':
887                                                b.append('\r');
888                                                break;
889                                        case 'n': 
890                                                b.append('\n');
891                                                break;
892                                        case 'f': 
893                                                b.append('\f');
894                                                break;
895                                        case '\'':
896                                                b.append('\'');
897                                                break;
898          case '\"':
899            b.append('\"');
900            break;
901                                        case '\\': 
902                                                b.append('\\');
903                                                break;
904                                        case '/': 
905                                                b.append('\\');
906                                                break;
907                                        case 'U':
908                                        case 'u':
909                                                i++;
910                                                int l = 4;
911                                                int uc = Integer.parseInt(s.substring(i, i+l), 16);
912                                                if (uc < (isUri ? 33 : 32)) {
913                                                        l = 8;
914                                                        uc = Integer.parseInt(s.substring(i, i+8), 16);
915                                                }
916                                                if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
917              throw new FHIRFormatError("Illegal unicode character");
918                                                b.append((char) uc);
919                                                i = i + l;
920                                                break;
921                                        default:
922            throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));
923                                        }
924                                } else {
925                                        b.append(ch);
926                                }
927        i++;
928                        }
929                        return b.toString();
930                }
931
932                public boolean done() {
933                        return type == null;
934                }
935
936    public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError {
937                        if (type != null && this.type != type)
938                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString());
939                        String res = token;
940      readNext(postColon);
941                        return res;
942                }
943
944                public String peek() throws Exception {
945                        return token;
946                }
947
948                public LexerTokenType peekType() {
949                        return type;
950                }
951
952    public void token(String token) throws FHIRFormatError {
953                        if (!token.equals(this.token))
954                                throw error("Unexpected word "+this.token+" looking for "+token);
955      next(LexerTokenType.TOKEN, token.equals(":"));
956                }
957
958                public void word(String word) throws Exception {
959                        if (!word.equals(this.token))
960                                throw error("Unexpected word "+this.token+" looking for "+word);
961      next(LexerTokenType.WORD, false);
962                }
963
964    public String word() throws FHIRFormatError {
965                        String t = token;
966      next(LexerTokenType.WORD, false);
967                        return t;
968                }
969
970    public String uri() throws FHIRFormatError {
971                        if (this.type != LexerTokenType.URI)
972                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI");
973                        String t = token;
974      next(LexerTokenType.URI, false);
975                        return t;
976                }
977
978    public String literal() throws FHIRFormatError {
979                        if (this.type != LexerTokenType.LITERAL)
980                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal");
981                        String t = token;
982      next(LexerTokenType.LITERAL, false);
983                        return t;
984                }
985
986                public boolean peek(LexerTokenType type, String token) {
987                        return this.type == type && this.token.equals(token);
988                }
989
990    public FHIRFormatError error(String message) {
991      return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message);
992                }
993
994        }
995        //      
996        //      public void importTtl(Section sct, String ttl) throws Exception {
997        //              if (!Utilities.noString(ttl)) {
998        //                      //        System.out.println("import ttl: "+ttl);
999        //                      Lexer lexer = new Lexer(ttl);
1000        //                      String subject = null;
1001        //                      String predicate = null;
1002        //                      while (!lexer.done()) {
1003        //                              if (subject == null)
1004        //                                      subject = lexer.next();
1005        //                              if (predicate == null)
1006        //                                      predicate = lexer.next();
1007        //                              if (lexer.peekType() == null) {
1008        //                                      throw new Error("Unexpected end of input parsing turtle");
1009        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1010        //                                      sct.triple(subject, predicate, lexer.next());
1011        //                              } else if (lexer.peek() == null) {
1012        //                                      throw new Error("Unexected - turtle lexer found no token");
1013        //                              } else if (lexer.peek().equals("[")) {
1014        //                                      sct.triple(subject, predicate, importComplex(lexer));
1015        //                              } else
1016        //                                      throw new Exception("Not done yet");
1017        //                              String n = lexer.next();
1018        //                              if (Utilities.noString(n))
1019        //                                      break;
1020        //                              if (n.equals(".")) {
1021        //                                      subject = null;
1022        //                                      predicate = null;
1023        //                              } else if (n.equals(";")) {
1024        //                                      predicate = null;
1025        //                              } else if (!n.equals(","))
1026        //                                      throw new Exception("Unexpected token "+n);          
1027        //                      }
1028        //              }
1029        //      }
1030
1031  public void parse(String source) throws FHIRFormatError {
1032                prefixes.clear();
1033                prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#");
1034                parse(new Lexer(source));
1035        }
1036
1037  private void parse(Lexer lexer) throws FHIRFormatError {
1038                boolean doPrefixes = true;
1039                while (!lexer.done()) {
1040      if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) {
1041                                boolean sparqlStyle = false;
1042        boolean base = false;
1043                                if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1044                                        lexer.token("@");
1045          String p = lexer.word();
1046          if (p.equals("base"))
1047            base = true;
1048          else if (!p.equals("prefix"))
1049            throw new FHIRFormatError("Unexpected token "+p);  
1050                                } else {
1051                                        sparqlStyle = true;
1052          String p = lexer.word();
1053          if (p.equals("BASE"))
1054            base = true;
1055          else if (!p.equals("PREFIX"))
1056            throw new FHIRFormatError("Unexpected token "+p);  
1057        }
1058        String prefix = null; 
1059        if (!base) {
1060          prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null;
1061                                lexer.token(":");
1062        }
1063        String url = lexer.next(LexerTokenType.URI, false);
1064                                if (!sparqlStyle)
1065                                        lexer.token(".");
1066        if (!base)
1067                                prefix(prefix, url);
1068        else if (this.base == null)
1069          this.base = url;
1070        else
1071          throw new FHIRFormatError("Duplicate @base");  
1072                        } else if (lexer.peekType() == LexerTokenType.URI) {
1073                                doPrefixes = false;
1074                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1075                                uri.setUri(lexer.uri());
1076                                TTLComplex complex = parseComplex(lexer);
1077                                objects.put(uri, complex);
1078                                lexer.token(".");
1079                        } else if (lexer.peekType() == LexerTokenType.WORD) {
1080                                doPrefixes = false;
1081                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1082                                String pfx = lexer.word();
1083                                if (!prefixes.containsKey(pfx))
1084          throw new FHIRFormatError("Unknown prefix "+pfx);
1085                                lexer.token(":");
1086                                uri.setUri(prefixes.get(pfx)+lexer.word());
1087                                TTLComplex complex = parseComplex(lexer);
1088                                objects.put(uri, complex);
1089                                lexer.token(".");
1090                        } else if (lexer.peek(LexerTokenType.TOKEN, ":")) {
1091                                doPrefixes = false;
1092                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1093                                lexer.token(":");
1094                                if (!prefixes.containsKey(null))
1095          throw new FHIRFormatError("Unknown prefix ''");
1096                                uri.setUri(prefixes.get(null)+lexer.word());
1097                                TTLComplex complex = parseComplex(lexer);
1098                                objects.put(uri, complex);
1099                                lexer.token(".");
1100                        } else if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1101                                doPrefixes = false;
1102                                lexer.token("[");
1103                                TTLComplex bnode = parseComplex(lexer);
1104                                lexer.token("]");
1105                                TTLComplex complex = null;
1106        if (!lexer.peek(LexerTokenType.TOKEN, ".")) {
1107                                        complex = parseComplex(lexer);
1108          // at this point, we collapse bnode and complex, and give bnode a fictional identity
1109          bnode.addPredicates(complex.predicates);
1110        }
1111        
1112        objects.put(anonymousId(), bnode);
1113                                lexer.token(".");
1114                        } else 
1115                                throw lexer.error("Unknown token "+lexer.token);
1116                }
1117        }
1118
1119  private TTLURL anonymousId() throws FHIRFormatError {
1120    TTLURL url = new TTLURL(-1, -1);
1121    url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase());
1122    return url;
1123  }
1124
1125  private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError {
1126                TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol);
1127
1128                boolean done = lexer.peek(LexerTokenType.TOKEN, "]");
1129                while (!done) {
1130                        String uri = null;
1131                        if (lexer.peekType() == LexerTokenType.URI)
1132                                uri = lexer.uri();
1133                        else {
1134                                String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1135                                if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) {
1136                                        lexer.token(":");
1137                                        if (!prefixes.containsKey(t))
1138            throw new FHIRFormatError("unknown prefix "+t);
1139                                        uri = prefixes.get(t)+lexer.word();
1140                                } else if (t.equals("a"))
1141                                        uri = prefixes.get("rdfs")+"type";
1142                                else
1143                                        throw lexer.error("unexpected token");
1144                        }
1145
1146                        boolean inlist = false;
1147                        if (lexer.peek(LexerTokenType.TOKEN, "(")) {
1148                                inlist = true;
1149                                lexer.token("(");
1150                        }
1151
1152                        boolean rpt = false;
1153                        do {
1154                                if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1155                                        lexer.token("[");
1156          result.addPredicate(uri, parseComplex(lexer));
1157                                        lexer.token("]");
1158                                } else if (lexer.peekType() == LexerTokenType.URI) {
1159                                        TTLURL u = new TTLURL(lexer.startLine, lexer.startCol);
1160                                        u.setUri(lexer.uri());
1161          result.addPredicate(uri, u);
1162                                } else if (lexer.peekType() == LexerTokenType.LITERAL) {
1163                                        TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol);
1164                                        u.value = lexer.literal();
1165                                        if (lexer.peek(LexerTokenType.TOKEN, "^")) {
1166                                                lexer.token("^");
1167                                                lexer.token("^");
1168                                                if (lexer.peekType() == LexerTokenType.URI) {
1169                                                        u.type = lexer.uri();
1170                                                } else {
1171                                                        String l = lexer.word();
1172                                                        lexer.token(":");
1173                                                        u.type = prefixes.get(l)+ lexer.word();
1174                                                }
1175                                        }
1176                                        if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1177                                                //lang tag - skip it 
1178                                                lexer.token("@");
1179            String lang = lexer.word();
1180            if (!lang.matches(LANG_REGEX)) {
1181              throw new FHIRFormatError("Invalid Language tag "+lang);
1182            }
1183                                        }
1184          result.addPredicate(uri, u);
1185                                } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) {
1186                                        int sl = lexer.startLine;
1187                                        int sc = lexer.startCol;
1188                                        String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1189                                        if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1190                                                TTLLiteral u = new TTLLiteral(sl, sc);
1191                                                u.value = pfx;
1192            result.addPredicate(uri, u);                                        
1193                                        } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1194                                                TTLLiteral u = new TTLLiteral(sl, sc);
1195                                                u.value = pfx;
1196            result.addPredicate(uri, u);                                        
1197                                        } else {
1198                                                if (!prefixes.containsKey(pfx))
1199              throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx));                                          
1200                                                TTLURL u = new TTLURL(sl, sc);
1201                                                lexer.token(":");
1202                                                u.setUri(prefixes.get(pfx)+lexer.word());
1203            result.addPredicate(uri, u);
1204                                        } 
1205                                } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) {
1206          throw new FHIRFormatError("unexpected token "+lexer.token);
1207                                }
1208
1209                                if (inlist)
1210                                        rpt = !lexer.peek(LexerTokenType.TOKEN, ")");
1211                                else {
1212                                        rpt = lexer.peek(LexerTokenType.TOKEN, ",");
1213                                        if (rpt)
1214            lexer.readNext(false);
1215                                }
1216                        } while (rpt);
1217                        if (inlist)
1218                                lexer.token(")");
1219
1220                        if (lexer.peek(LexerTokenType.TOKEN, ";")) {
1221        while ((lexer.peek(LexerTokenType.TOKEN, ";")))
1222                                lexer.token(";");
1223        done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]");
1224                        } else {
1225                                done = true;
1226                        }
1227                }
1228                return result;
1229        }
1230
1231  public Map<TTLURL, TTLComplex> getObjects() {
1232    return objects;
1233  }
1234
1235  public TTLComplex getObject(String url) {
1236    for (TTLURL t : objects.keySet()) {
1237      if (t.getUri().equals(url))
1238        return objects.get(t);
1239    }
1240    return null;
1241  }
1242
1243        //      public void parseFragment(Lexer lexer) throws Exception {
1244        //              lexer.next(); // read [
1245        //              Complex obj = new Complex();
1246        //              while (!lexer.peek().equals("]")) {
1247        //                      String predicate = lexer.next();
1248        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1249        //                              obj.predicate(predicate, lexer.next());
1250        //                      } else if (lexer.peek().equals("[")) {
1251        //                              obj.predicate(predicate, importComplex(lexer));
1252        //                      } else
1253        //                              throw new Exception("Not done yet");
1254        //                      if (lexer.peek().equals(";")) 
1255        //                              lexer.next();
1256        //              }
1257        //              lexer.next(); // read ]
1258        //              //return obj;
1259        //      }
1260        //
1261        //      public void importTtl(Section sct, String ttl) throws Exception {
1262        //              if (!Utilities.noString(ttl)) {
1263        //                      //        System.out.println("import ttl: "+ttl);
1264        //                      Lexer lexer = new Lexer(ttl);
1265        //                      String subject = null;
1266        //                      String predicate = null;
1267        //                      while (!lexer.done()) {
1268        //                              if (subject == null)
1269        //                                      subject = lexer.next();
1270        //                              if (predicate == null)
1271        //                                      predicate = lexer.next();
1272        //                              if (lexer.peekType() == null) {
1273        //                                      throw new Error("Unexpected end of input parsing turtle");
1274        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1275        //                                      sct.triple(subject, predicate, lexer.next());
1276        //                              } else if (lexer.peek() == null) {
1277        //                                      throw new Error("Unexected - turtle lexer found no token");
1278        //                              } else if (lexer.peek().equals("[")) {
1279        //                                      sct.triple(subject, predicate, importComplex(lexer));
1280        //                              } else
1281        //                                      throw new Exception("Not done yet");
1282        //                              String n = lexer.next();
1283        //                              if (Utilities.noString(n))
1284        //                                      break;
1285        //                              if (n.equals(".")) {
1286        //                                      subject = null;
1287        //                                      predicate = null;
1288        //                              } else if (n.equals(";")) {
1289        //                                      predicate = null;
1290        //                              } else if (!n.equals(","))
1291        //                                      throw new Exception("Unexpected token "+n);          
1292        //                      }
1293        //              }
1294        //}
1295
1296        //      private Complex importComplex(Lexer lexer) throws Exception {
1297        //              lexer.next(); // read [
1298        //              Complex obj = new Complex();
1299        //              while (!lexer.peek().equals("]")) {
1300        //                      String predicate = lexer.next();
1301        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1302        //                              obj.predicate(predicate, lexer.next());
1303        //                      } else if (lexer.peek().equals("[")) {
1304        //                              obj.predicate(predicate, importComplex(lexer));
1305        //                      } else
1306        //                              throw new Exception("Not done yet");
1307        //                      if (lexer.peek().equals(";")) 
1308        //                              lexer.next();
1309        //              }
1310        //              lexer.next(); // read ]
1311        //              return obj;
1312        //      }
1313
1314}