001package org.hl7.fhir.dstu3.utils.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.OutputStream; 036import java.io.OutputStreamWriter; 037import java.io.UnsupportedEncodingException; 038import java.util.ArrayList; 039import java.util.Collections; 040import java.util.HashMap; 041import java.util.HashSet; 042import java.util.List; 043import java.util.Map; 044import java.util.Set; 045import java.util.UUID; 046 047import org.hl7.fhir.exceptions.FHIRFormatError; 048import org.hl7.fhir.utilities.Utilities; 049 050public class Turtle { 051 052 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 053 054 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 055 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 056 057 // Object model 058 public abstract class Triple { 059 private String uri; 060 } 061 062 public class StringType extends Triple { 063 private String value; 064 065 public StringType(String value) { 066 super(); 067 this.value = value; 068 } 069 } 070 071 public class Complex extends Triple { 072 protected List<Predicate> predicates = new ArrayList<Predicate>(); 073 074 public Complex predicate(String predicate, String object) { 075 predicateSet.add(predicate); 076 objectSet.add(object); 077 return predicate(predicate, new StringType(object)); 078 } 079 080 public Complex linkedPredicate(String predicate, String object, String link) { 081 predicateSet.add(predicate); 082 objectSet.add(object); 083 return linkedPredicate(predicate, new StringType(object), link); 084 } 085 086 public Complex predicate(String predicate, Triple object) { 087 Predicate p = getPredicate(predicate); 088 if (p == null) { 089 p = new Predicate(); 090 p.predicate = predicate; 091 predicateSet.add(predicate); 092 predicates.add(p); 093 } 094 if (object instanceof StringType) 095 objectSet.add(((StringType) object).value); 096 p.objects.add(object); 097 return this; 098 } 099 100 protected Predicate getPredicate(String predicate) { 101 for (Predicate p : predicates) 102 if (p.predicate.equals(predicate)) 103 return p; 104 return null; 105 } 106 107 public Complex linkedPredicate(String predicate, Triple object, String link) { 108 Predicate p = getPredicate(predicate); 109 if (p == null) { 110 p = new Predicate(); 111 p.predicate = predicate; 112 p.link = link; 113 predicateSet.add(predicate); 114 predicates.add(p); 115 } 116 if (object instanceof StringType) 117 objectSet.add(((StringType) object).value); 118 p.objects.add(object); 119 return this; 120 } 121 122 public Complex predicate(String predicate) { 123 predicateSet.add(predicate); 124 Complex c = complex(); 125 predicate(predicate, c); 126 return c; 127 } 128 129 public Complex linkedPredicate(String predicate, String link) { 130 predicateSet.add(predicate); 131 Complex c = complex(); 132 linkedPredicate(predicate, c, link); 133 return c; 134 } 135 136 public void prefix(String code, String url) { 137 Turtle.this.prefix(code, url); 138 } 139 } 140 141 private class Predicate { 142 protected String predicate; 143 protected String link; 144 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 145 protected String comment; 146 147 public String getPredicate() { 148 return predicate; 149 } 150 public String makelink() { 151 if (link == null) 152 return predicate; 153 else 154 return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>"; 155 } 156 157 public List<Triple> getObjects() { 158 return objects; 159 } 160 public String getComment() { 161 return comment; 162 } 163 } 164 165 public class Subject extends Complex { 166 private String id; 167 168 public Predicate predicate(String predicate, Triple object, String comment) { 169 Predicate p = getPredicate(predicate); 170 if (p == null) { 171 p = new Predicate(); 172 p.predicate = predicate; 173 predicateSet.add(predicate); 174 predicates.add(p); 175 p.comment = comment; 176 } 177 if (object instanceof StringType) 178 objectSet.add(((StringType) object).value); 179 p.objects.add(object); 180 return p; 181 } 182 183 public void comment(String comment) { 184 if (!Utilities.noString(comment)) { 185 predicate("rdfs:comment", literal(comment)); 186 predicate("dcterms:description", literal(comment)); 187 } 188 } 189 190 public void label(String label) { 191 if (!Utilities.noString(label)) { 192 predicate("rdfs:label", literal(label)); 193 predicate("dc:title", literal(label)); 194 } 195 } 196 197 } 198 199 public class Section { 200 private String name; 201 private List<Subject> subjects = new ArrayList<Subject>(); 202 203 public Subject triple(String subject, String predicate, String object, String comment) { 204 return triple(subject, predicate, new StringType(object), comment); 205 } 206 207 public Subject triple(String subject, String predicate, String object) { 208 return triple(subject, predicate, new StringType(object)); 209 } 210 211 public Subject triple(String subject, String predicate, Triple object) { 212 return triple(subject, predicate, object, null); 213 } 214 215 public Subject triple(String subject, String predicate, Triple object, String comment) { 216 Subject s = subject(subject); 217 s.predicate(predicate, object, comment); 218 return s; 219 } 220 221 public void comment(String subject, String comment) { 222 triple(subject, "rdfs:comment", literal(comment)); 223 triple(subject, "dcterms:description", literal(comment)); 224 } 225 226 public void label(String subject, String comment) { 227 triple(subject, "rdfs:label", literal(comment)); 228 triple(subject, "dc:title", literal(comment)); 229 } 230 231 public Subject subject(String subject) { 232 for (Subject ss : subjects) 233 if (ss.id.equals(subject)) 234 return ss; 235 Subject s = new Subject(); 236 s.id = subject; 237 subjects.add(s); 238 return s; 239 } 240 241 public boolean hasSubject(String subject) { 242 for (Subject ss : subjects) 243 if (ss.id.equals(subject)) 244 return true; 245 return false; 246 } 247 } 248 249 private List<Section> sections = new ArrayList<Section>(); 250 protected Set<String> subjectSet = new HashSet<String>(); 251 protected Set<String> predicateSet = new HashSet<String>(); 252 protected Set<String> objectSet = new HashSet<String>(); 253 protected Map<String, String> prefixes = new HashMap<String, String>(); 254 255 public void prefix(String code, String url) { 256 prefixes.put(code, url); 257 } 258 259 protected boolean hasSection(String sn) { 260 for (Section s : sections) 261 if (s.name.equals(sn)) 262 return true; 263 return false; 264 265 } 266 267 public Section section(String sn) { 268 if (hasSection(sn)) 269 throw new Error("Duplicate section name "+sn); 270 Section s = new Section(); 271 s.name = sn; 272 sections.add(s); 273 return s; 274 } 275 276 protected String matches(String url, String prefixUri, String prefix) { 277 if (url.startsWith(prefixUri)) { 278 prefixes.put(prefix, prefixUri); 279 return prefix+":"+escape(url.substring(prefixUri.length()), false); 280 } 281 return null; 282 } 283 284 protected Complex complex() { 285 return new Complex(); 286 } 287 288 private void checkPrefix(Triple object) { 289 if (object instanceof StringType) 290 checkPrefix(((StringType) object).value); 291 else { 292 Complex obj = (Complex) object; 293 for (Predicate po : obj.predicates) { 294 checkPrefix(po.getPredicate()); 295 for (Triple o : po.getObjects()) 296 checkPrefix(o); 297 } 298 } 299 } 300 301 protected void checkPrefix(String pname) { 302 if (pname.startsWith("(")) 303 return; 304 if (pname.startsWith("\"")) 305 return; 306 if (pname.startsWith("<")) 307 return; 308 309 if (pname.contains(":")) { 310 String prefix = pname.substring(0, pname.indexOf(":")); 311 if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn")) 312 throw new Error("undefined prefix "+prefix); 313 } 314 } 315 316 protected StringType literal(String s) { 317 return new StringType("\""+escape(s, true)+"\""); 318 } 319 320 protected StringType literalTyped(String s, String t) { 321 return new StringType("\""+escape(s, true)+"\"^^xs:"+t); 322 } 323 324 public static String escape(String s, boolean string) { 325 if (s == null) 326 return ""; 327 328 StringBuilder b = new StringBuilder(); 329 for (char c : s.toCharArray()) { 330 if (c == '\r') 331 b.append("\\r"); 332 else if (c == '\n') 333 b.append("\\n"); 334 else if (c == '"') 335 b.append("\\\""); 336 else if (c == '\\') 337 b.append("\\\\"); 338 else if (c == '/' && !string) 339 b.append("\\/"); 340 else 341 b.append(c); 342 } 343 return b.toString(); 344 } 345 346 protected String pctEncode(String s) { 347 if (s == null) 348 return ""; 349 350 StringBuilder b = new StringBuilder(); 351 for (char c : s.toCharArray()) { 352 if (c >= 'A' && c <= 'Z') 353 b.append(c); 354 else if (c >= 'a' && c <= 'z') 355 b.append(c); 356 else if (c >= '0' && c <= '9') 357 b.append(c); 358 else if (c == '.') 359 b.append(c); 360 else 361 b.append("%"+Integer.toHexString(c)); 362 } 363 return b.toString(); 364 } 365 366 protected List<String> sorted(Set<String> keys) { 367 List<String> names = new ArrayList<String>(); 368 names.addAll(keys); 369 Collections.sort(names); 370 return names; 371 } 372 373 public void commit(OutputStream destination, boolean header) throws IOException { 374 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 375 commitPrefixes(writer, header); 376 for (Section s : sections) { 377 commitSection(writer, s); 378 } 379 writer.ln("# -------------------------------------------------------------------------------------"); 380 writer.ln(); 381 writer.flush(); 382 writer.close(); 383 } 384 385 public String asHtml() throws Exception { 386 StringBuilder b = new StringBuilder(); 387 b.append("<pre class=\"rdf\">\r\n"); 388 commitPrefixes(b); 389 for (Section s : sections) { 390 commitSection(b, s); 391 } 392 b.append("</pre>\r\n"); 393 b.append("\r\n"); 394 return b.toString(); 395 } 396 397 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 398 if (header) { 399 writer.ln("# FHIR Sub-definitions"); 400 writer.write("# This is work in progress, and may change rapidly \r\n"); 401 writer.ln(); 402 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 403 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 404 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 405 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 406 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 407 writer.ln(); 408 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 409 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 410 writer.write("# predicates \r\n"); 411 writer.ln(); 412 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 413 writer.ln(); 414 writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 415 writer.ln(); 416 } 417 for (String p : sorted(prefixes.keySet())) 418 writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> ."); 419 writer.ln(); 420 if (header) { 421 writer.ln("# Predicates used in this file:"); 422 for (String s : sorted(predicateSet)) 423 writer.ln(" # "+s); 424 writer.ln(); 425 } 426 } 427 428 private void commitPrefixes(StringBuilder b) throws Exception { 429 for (String p : sorted(prefixes.keySet())) 430 b.append("@prefix "+p+": <"+prefixes.get(p)+"> .\r\n"); 431 b.append("\r\n"); 432 } 433 434 // private String lastSubject = null; 435 // private String lastComment = ""; 436 437 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 438 writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())); 439 writer.ln(); 440 for (Subject sbj : section.subjects) { 441 if (Utilities.noString(sbj.id)) { 442 writer.write("["); 443 } else { 444 writer.write(sbj.id); 445 writer.write(" "); 446 } 447 int i = 0; 448 449 for (Predicate p : sbj.predicates) { 450 writer.write(p.getPredicate()); 451 writer.write(" "); 452 boolean first = true; 453 for (Triple o : p.getObjects()) { 454 if (first) 455 first = false; 456 else 457 writer.write(", "); 458 if (o instanceof StringType) 459 writer.write(((StringType) o).value); 460 else { 461 writer.write("["); 462 if (write((Complex) o, writer, 4)) 463 writer.write("\r\n ]"); 464 else 465 writer.write("]"); 466 } 467 } 468 String comment = p.comment == null? "" : " # "+p.comment; 469 i++; 470 if (i < sbj.predicates.size()) 471 writer.write(";"+comment+"\r\n "); 472 else { 473 if (Utilities.noString(sbj.id)) 474 writer.write("]"); 475 writer.write(" ."+comment+"\r\n\r\n"); 476 } 477 } 478 } 479 } 480 481 private void commitSection(StringBuilder b, Section section) throws Exception { 482 b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n"); 483 b.append("\r\n"); 484 for (Subject sbj : section.subjects) { 485 b.append(Utilities.escapeXml(sbj.id)); 486 b.append(" "); 487 int i = 0; 488 489 for (Predicate p : sbj.predicates) { 490 b.append(p.makelink()); 491 b.append(" "); 492 boolean first = true; 493 for (Triple o : p.getObjects()) { 494 if (first) 495 first = false; 496 else 497 b.append(", "); 498 if (o instanceof StringType) 499 b.append(Utilities.escapeXml(((StringType) o).value)); 500 else { 501 b.append("["); 502 if (write((Complex) o, b, 4)) 503 b.append("\r\n ]"); 504 else 505 b.append("]"); 506 } 507 } 508 String comment = p.comment == null? "" : " # "+p.comment; 509 i++; 510 if (i < sbj.predicates.size()) 511 b.append(";"+Utilities.escapeXml(comment)+"\r\n "); 512 else 513 b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n"); 514 } 515 } 516 } 517 518 protected class LineOutputStreamWriter extends OutputStreamWriter { 519 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 520 super(out, "UTF-8"); 521 } 522 523 private void ln() throws IOException { 524 write("\r\n"); 525 } 526 527 private void ln(String s) throws IOException { 528 write(s); 529 write("\r\n"); 530 } 531 } 532 533 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 534 if (complex.predicates.isEmpty()) 535 return false; 536 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 537 writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value); 538 return false; 539 } 540 String left = Utilities.padLeft("", ' ', indent); 541 int i = 0; 542 for (Predicate po : complex.predicates) { 543 writer.write("\r\n"); 544 boolean first = true; 545 for (Triple o : po.getObjects()) { 546 if (first) { 547 first = false; 548 writer.write(left+" "+po.getPredicate()+" "); 549 } else 550 writer.write(", "); 551 if (o instanceof StringType) 552 writer.write(((StringType) o).value); 553 else { 554 writer.write("["); 555 if (write((Complex) o, writer, indent+2)) 556 writer.write("\r\n"+left+" ]"); 557 else 558 writer.write(" ]"); 559 } 560 } 561 i++; 562 if (i < complex.predicates.size()) 563 writer.write(";"); 564 if (!Utilities.noString(po.comment)) 565 writer.write(" # "+escape(po.comment, false)); 566 } 567 return true; 568 } 569 570 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 571 if (complex.predicates.isEmpty()) 572 return false; 573 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 574 b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 575 return false; 576 } 577 String left = Utilities.padLeft("", ' ', indent); 578 int i = 0; 579 for (Predicate po : complex.predicates) { 580 b.append("\r\n"); 581 boolean first = true; 582 for (Triple o : po.getObjects()) { 583 if (first) { 584 first = false; 585 b.append(left+" "+po.makelink()+" "); 586 } else 587 b.append(", "); 588 if (o instanceof StringType) 589 b.append(Utilities.escapeXml(((StringType) o).value)); 590 else { 591 b.append("["); 592 if (write((Complex) o, b, indent+2)) 593 b.append(left+" ]"); 594 else 595 b.append(" ]"); 596 } 597 } 598 i++; 599 if (i < complex.predicates.size()) 600 b.append(";"); 601 if (!Utilities.noString(po.comment)) 602 b.append(" # "+Utilities.escapeXml(escape(po.comment, false))); 603 } 604 return true; 605 } 606 607 608 public abstract class TTLObject { 609 protected int line; 610 protected int col; 611 612 abstract public boolean hasValue(String value); 613 614 public int getLine() { 615 return line; 616 } 617 618 public int getCol() { 619 return col; 620 } 621 622 623 } 624 625 626 public class TTLLiteral extends TTLObject { 627 628 private String value; 629 private String type; 630 protected TTLLiteral(int line, int col) { 631 this.line = line; 632 this.col = col; 633 } 634 @Override 635 public boolean hasValue(String value) { 636 return value.equals(this.value); 637 } 638 public String getValue() { 639 return value; 640 } 641 public String getType() { 642 return type; 643 } 644 645 } 646 647 public class TTLURL extends TTLObject { 648 private String uri; 649 650 protected TTLURL(int line, int col) { 651 this.line = line; 652 this.col = col; 653 } 654 655 public String getUri() { 656 return uri; 657 } 658 659 public void setUri(String uri) throws FHIRFormatError { 660 if (!uri.matches(IRI_URL)) 661 throw new FHIRFormatError("Illegal URI "+uri); 662 this.uri = uri; 663 } 664 665 @Override 666 public boolean hasValue(String value) { 667 return value.equals(this.uri); 668 } 669 } 670 671 public class TTLList extends TTLObject { 672 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 673 674 public TTLList(TTLObject obj) { 675 super(); 676 list.add(obj); 677 } 678 679 @Override 680 public boolean hasValue(String value) { 681 for (TTLObject obj : list) 682 if (obj.hasValue(value)) 683 return true; 684 return false; 685 } 686 687 public List<TTLObject> getList() { 688 return list; 689 } 690 691 } 692 public class TTLComplex extends TTLObject { 693 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 694 protected TTLComplex(int line, int col) { 695 this.line = line; 696 this.col = col; 697 } 698 public Map<String, TTLObject> getPredicates() { 699 return predicates; 700 } 701 @Override 702 public boolean hasValue(String value) { 703 return false; 704 } 705 public void addPredicate(String uri, TTLObject obj) { 706 if (!predicates.containsKey(uri)) 707 predicates.put(uri, obj); 708 else { 709 TTLObject eo = predicates.get(uri); 710 TTLList list = null; 711 if (eo instanceof TTLList) 712 list = (TTLList) eo; 713 else { 714 list = new TTLList(eo); 715 predicates.put(uri, list); 716 } 717 list.list.add(obj); 718 } 719 } 720 public void addPredicates(Map<String, TTLObject> values) { 721 for (String s : values.keySet()) { 722 addPredicate(s, values.get(s)); 723 } 724 } 725 } 726 727 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 728 729 private Object base; 730 731 public enum LexerTokenType { 732 TOKEN, // [, ], :, @ 733 WORD, // a word 734 URI, // a URI <> 735 LITERAL // "..." 736 } 737 738 public class Lexer { 739 740 741 private String source; 742 private LexerTokenType type; 743 private int cursor, line, col, startLine, startCol; 744 private String token; 745 746 public Lexer(String source) throws FHIRFormatError { 747 this.source = source; 748 cursor = 0; 749 line = 1; 750 col = 1; 751 readNext(false); 752 } 753 754 private void skipWhitespace() { 755 while (cursor < source.length()) { 756 char ch = source.charAt(cursor); 757 if (Character.isWhitespace(ch)) 758 grab(); 759 else if (ch == '#') { 760 ch = grab(); 761 while (cursor < source.length()) { 762 ch = grab(); 763 if (ch == '\r' || ch == '\n') { 764 break; 765 } 766 } 767 } else 768 break; 769 } 770 } 771 772 private char grab() { 773 char c = source.charAt(cursor); 774 if (c == '\n') { 775 line++; 776 col = 1; 777 } else 778 col++; 779 780 cursor++; 781 return c; 782 } 783 784 private void readNext(boolean postColon) throws FHIRFormatError { 785 token = null; 786 type = null; 787 skipWhitespace(); 788 if (cursor >= source.length()) 789 return; 790 startLine = line; 791 startCol = col; 792 char ch = grab(); 793 StringBuilder b = new StringBuilder(); 794 switch (ch) { 795 case '@': 796 case '.': 797 case ':': 798 case ';': 799 case '^': 800 case ',': 801 case ']': 802 case '[': 803 case '(': 804 case ')': 805 type = LexerTokenType.TOKEN; 806 b.append(ch); 807 token = b.toString(); 808 return; 809 case '<': 810 while (cursor < source.length()) { 811 ch = grab(); 812 if (ch == '>') 813 break; 814 b.append(ch); 815 } 816 type = LexerTokenType.URI; 817 token = unescape(b.toString(), true); 818 return; 819 case '"': 820 b.append(ch); 821 String end = "\""; 822 while (cursor < source.length()) { 823 ch = grab(); 824 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 825 cursor--; 826 break; 827 } 828 b.append(ch); 829 if (ch == '"') 830 if (b.toString().equals("\"\"\"")) 831 end = "\"\"\""; 832 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end)) 833 break; 834 } 835 type = LexerTokenType.LITERAL; 836 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 837 return; 838 case '\'': 839 b.append(ch); 840 end = "'"; 841 while (cursor < source.length()) { 842 ch = grab(); 843 if (b.equals("''") && ch != '\'') { 844 cursor--; 845 break; 846 } 847 b.append(ch); 848 if (b.toString().equals("'''")) 849 end = "'''"; 850 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 851 break; 852 } 853 type = LexerTokenType.LITERAL; 854 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 855 return; 856 default: 857 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 858 b.append(ch); 859 while (cursor < source.length()) { 860 ch = grab(); 861 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#')) 862 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon)) 863 break; 864 b.append(ch); 865 } 866 type = LexerTokenType.WORD; 867 token = b.toString(); 868 cursor--; 869 return; 870 } else 871 throw error("unexpected lexer char "+ch); 872 } 873 } 874 875 private String unescape(String s, boolean isUri) throws FHIRFormatError { 876 StringBuilder b = new StringBuilder(); 877 int i = 0; 878 while (i < s.length()) { 879 char ch = s.charAt(i); 880 if (ch == '\\' && i < s.length()-1) { 881 i++; 882 switch (s.charAt(i)) { 883 case 't': 884 b.append('\t'); 885 break; 886 case 'r': 887 b.append('\r'); 888 break; 889 case 'n': 890 b.append('\n'); 891 break; 892 case 'f': 893 b.append('\f'); 894 break; 895 case '\'': 896 b.append('\''); 897 break; 898 case '\"': 899 b.append('\"'); 900 break; 901 case '\\': 902 b.append('\\'); 903 break; 904 case '/': 905 b.append('\\'); 906 break; 907 case 'U': 908 case 'u': 909 i++; 910 int l = 4; 911 int uc = Integer.parseInt(s.substring(i, i+l), 16); 912 if (uc < (isUri ? 33 : 32)) { 913 l = 8; 914 uc = Integer.parseInt(s.substring(i, i+8), 16); 915 } 916 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 917 throw new FHIRFormatError("Illegal unicode character"); 918 b.append((char) uc); 919 i = i + l; 920 break; 921 default: 922 throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); 923 } 924 } else { 925 b.append(ch); 926 } 927 i++; 928 } 929 return b.toString(); 930 } 931 932 public boolean done() { 933 return type == null; 934 } 935 936 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 937 if (type != null && this.type != type) 938 throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString()); 939 String res = token; 940 readNext(postColon); 941 return res; 942 } 943 944 public String peek() throws Exception { 945 return token; 946 } 947 948 public LexerTokenType peekType() { 949 return type; 950 } 951 952 public void token(String token) throws FHIRFormatError { 953 if (!token.equals(this.token)) 954 throw error("Unexpected word "+this.token+" looking for "+token); 955 next(LexerTokenType.TOKEN, token.equals(":")); 956 } 957 958 public void word(String word) throws Exception { 959 if (!word.equals(this.token)) 960 throw error("Unexpected word "+this.token+" looking for "+word); 961 next(LexerTokenType.WORD, false); 962 } 963 964 public String word() throws FHIRFormatError { 965 String t = token; 966 next(LexerTokenType.WORD, false); 967 return t; 968 } 969 970 public String uri() throws FHIRFormatError { 971 if (this.type != LexerTokenType.URI) 972 throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI"); 973 String t = token; 974 next(LexerTokenType.URI, false); 975 return t; 976 } 977 978 public String literal() throws FHIRFormatError { 979 if (this.type != LexerTokenType.LITERAL) 980 throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal"); 981 String t = token; 982 next(LexerTokenType.LITERAL, false); 983 return t; 984 } 985 986 public boolean peek(LexerTokenType type, String token) { 987 return this.type == type && this.token.equals(token); 988 } 989 990 public FHIRFormatError error(String message) { 991 return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message); 992 } 993 994 } 995 // 996 // public void importTtl(Section sct, String ttl) throws Exception { 997 // if (!Utilities.noString(ttl)) { 998 // // System.out.println("import ttl: "+ttl); 999 // Lexer lexer = new Lexer(ttl); 1000 // String subject = null; 1001 // String predicate = null; 1002 // while (!lexer.done()) { 1003 // if (subject == null) 1004 // subject = lexer.next(); 1005 // if (predicate == null) 1006 // predicate = lexer.next(); 1007 // if (lexer.peekType() == null) { 1008 // throw new Error("Unexpected end of input parsing turtle"); 1009 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1010 // sct.triple(subject, predicate, lexer.next()); 1011 // } else if (lexer.peek() == null) { 1012 // throw new Error("Unexected - turtle lexer found no token"); 1013 // } else if (lexer.peek().equals("[")) { 1014 // sct.triple(subject, predicate, importComplex(lexer)); 1015 // } else 1016 // throw new Exception("Not done yet"); 1017 // String n = lexer.next(); 1018 // if (Utilities.noString(n)) 1019 // break; 1020 // if (n.equals(".")) { 1021 // subject = null; 1022 // predicate = null; 1023 // } else if (n.equals(";")) { 1024 // predicate = null; 1025 // } else if (!n.equals(",")) 1026 // throw new Exception("Unexpected token "+n); 1027 // } 1028 // } 1029 // } 1030 1031 public void parse(String source) throws FHIRFormatError { 1032 prefixes.clear(); 1033 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1034 parse(new Lexer(source)); 1035 } 1036 1037 private void parse(Lexer lexer) throws FHIRFormatError { 1038 boolean doPrefixes = true; 1039 while (!lexer.done()) { 1040 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1041 boolean sparqlStyle = false; 1042 boolean base = false; 1043 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1044 lexer.token("@"); 1045 String p = lexer.word(); 1046 if (p.equals("base")) 1047 base = true; 1048 else if (!p.equals("prefix")) 1049 throw new FHIRFormatError("Unexpected token "+p); 1050 } else { 1051 sparqlStyle = true; 1052 String p = lexer.word(); 1053 if (p.equals("BASE")) 1054 base = true; 1055 else if (!p.equals("PREFIX")) 1056 throw new FHIRFormatError("Unexpected token "+p); 1057 } 1058 String prefix = null; 1059 if (!base) { 1060 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1061 lexer.token(":"); 1062 } 1063 String url = lexer.next(LexerTokenType.URI, false); 1064 if (!sparqlStyle) 1065 lexer.token("."); 1066 if (!base) 1067 prefix(prefix, url); 1068 else if (this.base == null) 1069 this.base = url; 1070 else 1071 throw new FHIRFormatError("Duplicate @base"); 1072 } else if (lexer.peekType() == LexerTokenType.URI) { 1073 doPrefixes = false; 1074 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1075 uri.setUri(lexer.uri()); 1076 TTLComplex complex = parseComplex(lexer); 1077 objects.put(uri, complex); 1078 lexer.token("."); 1079 } else if (lexer.peekType() == LexerTokenType.WORD) { 1080 doPrefixes = false; 1081 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1082 String pfx = lexer.word(); 1083 if (!prefixes.containsKey(pfx)) 1084 throw new FHIRFormatError("Unknown prefix "+pfx); 1085 lexer.token(":"); 1086 uri.setUri(prefixes.get(pfx)+lexer.word()); 1087 TTLComplex complex = parseComplex(lexer); 1088 objects.put(uri, complex); 1089 lexer.token("."); 1090 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1091 doPrefixes = false; 1092 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1093 lexer.token(":"); 1094 if (!prefixes.containsKey(null)) 1095 throw new FHIRFormatError("Unknown prefix ''"); 1096 uri.setUri(prefixes.get(null)+lexer.word()); 1097 TTLComplex complex = parseComplex(lexer); 1098 objects.put(uri, complex); 1099 lexer.token("."); 1100 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1101 doPrefixes = false; 1102 lexer.token("["); 1103 TTLComplex bnode = parseComplex(lexer); 1104 lexer.token("]"); 1105 TTLComplex complex = null; 1106 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1107 complex = parseComplex(lexer); 1108 // at this point, we collapse bnode and complex, and give bnode a fictional identity 1109 bnode.addPredicates(complex.predicates); 1110 } 1111 1112 objects.put(anonymousId(), bnode); 1113 lexer.token("."); 1114 } else 1115 throw lexer.error("Unknown token "+lexer.token); 1116 } 1117 } 1118 1119 private TTLURL anonymousId() throws FHIRFormatError { 1120 TTLURL url = new TTLURL(-1, -1); 1121 url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase()); 1122 return url; 1123 } 1124 1125 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1126 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1127 1128 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1129 while (!done) { 1130 String uri = null; 1131 if (lexer.peekType() == LexerTokenType.URI) 1132 uri = lexer.uri(); 1133 else { 1134 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1135 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1136 lexer.token(":"); 1137 if (!prefixes.containsKey(t)) 1138 throw new FHIRFormatError("unknown prefix "+t); 1139 uri = prefixes.get(t)+lexer.word(); 1140 } else if (t.equals("a")) 1141 uri = prefixes.get("rdfs")+"type"; 1142 else 1143 throw lexer.error("unexpected token"); 1144 } 1145 1146 boolean inlist = false; 1147 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1148 inlist = true; 1149 lexer.token("("); 1150 } 1151 1152 boolean rpt = false; 1153 do { 1154 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1155 lexer.token("["); 1156 result.addPredicate(uri, parseComplex(lexer)); 1157 lexer.token("]"); 1158 } else if (lexer.peekType() == LexerTokenType.URI) { 1159 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1160 u.setUri(lexer.uri()); 1161 result.addPredicate(uri, u); 1162 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1163 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1164 u.value = lexer.literal(); 1165 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1166 lexer.token("^"); 1167 lexer.token("^"); 1168 if (lexer.peekType() == LexerTokenType.URI) { 1169 u.type = lexer.uri(); 1170 } else { 1171 String l = lexer.word(); 1172 lexer.token(":"); 1173 u.type = prefixes.get(l)+ lexer.word(); 1174 } 1175 } 1176 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1177 //lang tag - skip it 1178 lexer.token("@"); 1179 String lang = lexer.word(); 1180 if (!lang.matches(LANG_REGEX)) { 1181 throw new FHIRFormatError("Invalid Language tag "+lang); 1182 } 1183 } 1184 result.addPredicate(uri, u); 1185 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1186 int sl = lexer.startLine; 1187 int sc = lexer.startCol; 1188 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1189 if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1190 TTLLiteral u = new TTLLiteral(sl, sc); 1191 u.value = pfx; 1192 result.addPredicate(uri, u); 1193 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1194 TTLLiteral u = new TTLLiteral(sl, sc); 1195 u.value = pfx; 1196 result.addPredicate(uri, u); 1197 } else { 1198 if (!prefixes.containsKey(pfx)) 1199 throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx)); 1200 TTLURL u = new TTLURL(sl, sc); 1201 lexer.token(":"); 1202 u.setUri(prefixes.get(pfx)+lexer.word()); 1203 result.addPredicate(uri, u); 1204 } 1205 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1206 throw new FHIRFormatError("unexpected token "+lexer.token); 1207 } 1208 1209 if (inlist) 1210 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1211 else { 1212 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1213 if (rpt) 1214 lexer.readNext(false); 1215 } 1216 } while (rpt); 1217 if (inlist) 1218 lexer.token(")"); 1219 1220 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1221 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1222 lexer.token(";"); 1223 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1224 } else { 1225 done = true; 1226 } 1227 } 1228 return result; 1229 } 1230 1231 public Map<TTLURL, TTLComplex> getObjects() { 1232 return objects; 1233 } 1234 1235 public TTLComplex getObject(String url) { 1236 for (TTLURL t : objects.keySet()) { 1237 if (t.getUri().equals(url)) 1238 return objects.get(t); 1239 } 1240 return null; 1241 } 1242 1243 // public void parseFragment(Lexer lexer) throws Exception { 1244 // lexer.next(); // read [ 1245 // Complex obj = new Complex(); 1246 // while (!lexer.peek().equals("]")) { 1247 // String predicate = lexer.next(); 1248 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1249 // obj.predicate(predicate, lexer.next()); 1250 // } else if (lexer.peek().equals("[")) { 1251 // obj.predicate(predicate, importComplex(lexer)); 1252 // } else 1253 // throw new Exception("Not done yet"); 1254 // if (lexer.peek().equals(";")) 1255 // lexer.next(); 1256 // } 1257 // lexer.next(); // read ] 1258 // //return obj; 1259 // } 1260 // 1261 // public void importTtl(Section sct, String ttl) throws Exception { 1262 // if (!Utilities.noString(ttl)) { 1263 // // System.out.println("import ttl: "+ttl); 1264 // Lexer lexer = new Lexer(ttl); 1265 // String subject = null; 1266 // String predicate = null; 1267 // while (!lexer.done()) { 1268 // if (subject == null) 1269 // subject = lexer.next(); 1270 // if (predicate == null) 1271 // predicate = lexer.next(); 1272 // if (lexer.peekType() == null) { 1273 // throw new Error("Unexpected end of input parsing turtle"); 1274 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1275 // sct.triple(subject, predicate, lexer.next()); 1276 // } else if (lexer.peek() == null) { 1277 // throw new Error("Unexected - turtle lexer found no token"); 1278 // } else if (lexer.peek().equals("[")) { 1279 // sct.triple(subject, predicate, importComplex(lexer)); 1280 // } else 1281 // throw new Exception("Not done yet"); 1282 // String n = lexer.next(); 1283 // if (Utilities.noString(n)) 1284 // break; 1285 // if (n.equals(".")) { 1286 // subject = null; 1287 // predicate = null; 1288 // } else if (n.equals(";")) { 1289 // predicate = null; 1290 // } else if (!n.equals(",")) 1291 // throw new Exception("Unexpected token "+n); 1292 // } 1293 // } 1294 //} 1295 1296 // private Complex importComplex(Lexer lexer) throws Exception { 1297 // lexer.next(); // read [ 1298 // Complex obj = new Complex(); 1299 // while (!lexer.peek().equals("]")) { 1300 // String predicate = lexer.next(); 1301 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1302 // obj.predicate(predicate, lexer.next()); 1303 // } else if (lexer.peek().equals("[")) { 1304 // obj.predicate(predicate, importComplex(lexer)); 1305 // } else 1306 // throw new Exception("Not done yet"); 1307 // if (lexer.peek().equals(";")) 1308 // lexer.next(); 1309 // } 1310 // lexer.next(); // read ] 1311 // return obj; 1312 // } 1313 1314}