001package org.hl7.fhir.r4.utils.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032import java.io.IOException; 033import java.io.OutputStream; 034import java.io.OutputStreamWriter; 035import java.io.UnsupportedEncodingException; 036import java.util.ArrayList; 037import java.util.Collections; 038import java.util.HashMap; 039import java.util.HashSet; 040import java.util.List; 041import java.util.Map; 042import java.util.Set; 043import java.util.UUID; 044 045import org.hl7.fhir.exceptions.FHIRFormatError; 046import org.hl7.fhir.utilities.Utilities; 047 048public class Turtle { 049 050 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 051 052 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\=" 053 + GOOD_IRI_CHAR + "])+"; 054 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 055 056 // Object model 057 public abstract class Triple { 058 private String uri; 059 } 060 061 public class StringType extends Triple { 062 private String value; 063 064 public StringType(String value) { 065 super(); 066 this.value = value; 067 } 068 } 069 070 public class Complex extends Triple { 071 protected List<Predicate> predicates = new ArrayList<Predicate>(); 072 073 public Complex predicate(String predicate, String object) { 074 predicateSet.add(predicate); 075 objectSet.add(object); 076 return predicate(predicate, new StringType(object)); 077 } 078 079 public Complex linkedPredicate(String predicate, String object, String link) { 080 predicateSet.add(predicate); 081 objectSet.add(object); 082 return linkedPredicate(predicate, new StringType(object), link); 083 } 084 085 public Complex predicate(String predicate, Triple object) { 086 Predicate p = getPredicate(predicate); 087 if (p == null) { 088 p = new Predicate(); 089 p.predicate = predicate; 090 predicateSet.add(predicate); 091 predicates.add(p); 092 } 093 if (object instanceof StringType) 094 objectSet.add(((StringType) object).value); 095 p.objects.add(object); 096 return this; 097 } 098 099 protected Predicate getPredicate(String predicate) { 100 for (Predicate p : predicates) 101 if (p.predicate.equals(predicate)) 102 return p; 103 return null; 104 } 105 106 public Complex linkedPredicate(String predicate, Triple object, String link) { 107 Predicate p = getPredicate(predicate); 108 if (p == null) { 109 p = new Predicate(); 110 p.predicate = predicate; 111 p.link = link; 112 predicateSet.add(predicate); 113 predicates.add(p); 114 } 115 if (object instanceof StringType) 116 objectSet.add(((StringType) object).value); 117 p.objects.add(object); 118 return this; 119 } 120 121 public Complex predicate(String predicate) { 122 predicateSet.add(predicate); 123 Complex c = complex(); 124 predicate(predicate, c); 125 return c; 126 } 127 128 public Complex linkedPredicate(String predicate, String link) { 129 predicateSet.add(predicate); 130 Complex c = complex(); 131 linkedPredicate(predicate, c, link); 132 return c; 133 } 134 135 public void prefix(String code, String url) { 136 Turtle.this.prefix(code, url); 137 } 138 } 139 140 private class Predicate { 141 protected String predicate; 142 protected String link; 143 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 144 protected String comment; 145 146 public String getPredicate() { 147 return predicate; 148 } 149 150 public String makelink() { 151 if (link == null) 152 return predicate; 153 else 154 return "<a href=\"" + link + "\">" + Utilities.escapeXml(predicate) + "</a>"; 155 } 156 157 public List<Triple> getObjects() { 158 return objects; 159 } 160 161 public String getComment() { 162 return comment; 163 } 164 } 165 166 public class Subject extends Complex { 167 private String id; 168 169 public Predicate predicate(String predicate, Triple object, String comment) { 170 Predicate p = getPredicate(predicate); 171 if (p == null) { 172 p = new Predicate(); 173 p.predicate = predicate; 174 predicateSet.add(predicate); 175 predicates.add(p); 176 p.comment = comment; 177 } 178 if (object instanceof StringType) 179 objectSet.add(((StringType) object).value); 180 p.objects.add(object); 181 return p; 182 } 183 184 public void comment(String comment) { 185 if (!Utilities.noString(comment)) { 186 predicate("rdfs:comment", literal(comment)); 187 predicate("dcterms:description", literal(comment)); 188 } 189 } 190 191 public void label(String label) { 192 if (!Utilities.noString(label)) { 193 predicate("rdfs:label", literal(label)); 194 predicate("dc:title", literal(label)); 195 } 196 } 197 198 } 199 200 public class Section { 201 private String name; 202 private List<Subject> subjects = new ArrayList<Subject>(); 203 204 public Subject triple(String subject, String predicate, String object, String comment) { 205 return triple(subject, predicate, new StringType(object), comment); 206 } 207 208 public Subject triple(String subject, String predicate, String object) { 209 return triple(subject, predicate, new StringType(object)); 210 } 211 212 public Subject triple(String subject, String predicate, Triple object) { 213 return triple(subject, predicate, object, null); 214 } 215 216 public Subject triple(String subject, String predicate, Triple object, String comment) { 217 Subject s = subject(subject); 218 s.predicate(predicate, object, comment); 219 return s; 220 } 221 222 public void comment(String subject, String comment) { 223 triple(subject, "rdfs:comment", literal(comment)); 224 triple(subject, "dcterms:description", literal(comment)); 225 } 226 227 public void label(String subject, String comment) { 228 triple(subject, "rdfs:label", literal(comment)); 229 triple(subject, "dc:title", literal(comment)); 230 } 231 232 public Subject subject(String subject) { 233 for (Subject ss : subjects) 234 if (ss.id.equals(subject)) 235 return ss; 236 Subject s = new Subject(); 237 s.id = subject; 238 subjects.add(s); 239 return s; 240 } 241 242 public boolean hasSubject(String subject) { 243 for (Subject ss : subjects) 244 if (ss.id.equals(subject)) 245 return true; 246 return false; 247 } 248 } 249 250 private List<Section> sections = new ArrayList<Section>(); 251 protected Set<String> subjectSet = new HashSet<String>(); 252 protected Set<String> predicateSet = new HashSet<String>(); 253 protected Set<String> objectSet = new HashSet<String>(); 254 protected Map<String, String> prefixes = new HashMap<String, String>(); 255 256 public void prefix(String code, String url) { 257 prefixes.put(code, url); 258 } 259 260 protected boolean hasSection(String sn) { 261 for (Section s : sections) 262 if (s.name.equals(sn)) 263 return true; 264 return false; 265 266 } 267 268 public Section section(String sn) { 269 if (hasSection(sn)) 270 throw new Error("Duplicate section name " + sn); 271 Section s = new Section(); 272 s.name = sn; 273 sections.add(s); 274 return s; 275 } 276 277 protected String matches(String url, String prefixUri, String prefix) { 278 if (url.startsWith(prefixUri)) { 279 prefixes.put(prefix, prefixUri); 280 return prefix + ":" + escape(url.substring(prefixUri.length()), false); 281 } 282 return null; 283 } 284 285 protected Complex complex() { 286 return new Complex(); 287 } 288 289 private void checkPrefix(Triple object) { 290 if (object instanceof StringType) 291 checkPrefix(((StringType) object).value); 292 else { 293 Complex obj = (Complex) object; 294 for (Predicate po : obj.predicates) { 295 checkPrefix(po.getPredicate()); 296 for (Triple o : po.getObjects()) 297 checkPrefix(o); 298 } 299 } 300 } 301 302 protected void checkPrefix(String pname) { 303 if (pname.startsWith("(")) 304 return; 305 if (pname.startsWith("\"")) 306 return; 307 if (pname.startsWith("<")) 308 return; 309 310 if (pname.contains(":")) { 311 String prefix = pname.substring(0, pname.indexOf(":")); 312 if (!prefixes.containsKey(prefix) && !prefix.equals("http") && !prefix.equals("urn")) 313 throw new Error("undefined prefix " + prefix); 314 } 315 } 316 317 protected StringType literal(String s) { 318 return new StringType("\"" + escape(s, true) + "\""); 319 } 320 321 protected StringType literalTyped(String s, String t) { 322 return new StringType("\"" + escape(s, true) + "\"^^xs:" + t); 323 } 324 325 public static String escape(String s, boolean string) { 326 if (s == null) 327 return ""; 328 329 StringBuilder b = new StringBuilder(); 330 for (char c : s.toCharArray()) { 331 if (c == '\r') 332 b.append("\\r"); 333 else if (c == '\n') 334 b.append("\\n"); 335 else if (c == '"') 336 b.append("\\\""); 337 else if (c == '\\') 338 b.append("\\\\"); 339 else if (c == '/' && !string) 340 b.append("\\/"); 341 else 342 b.append(c); 343 } 344 return b.toString(); 345 } 346 347 protected String pctEncode(String s) { 348 if (s == null) 349 return ""; 350 351 StringBuilder b = new StringBuilder(); 352 for (char c : s.toCharArray()) { 353 if (c >= 'A' && c <= 'Z') 354 b.append(c); 355 else if (c >= 'a' && c <= 'z') 356 b.append(c); 357 else if (c >= '0' && c <= '9') 358 b.append(c); 359 else if (c == '.') 360 b.append(c); 361 else 362 b.append("%" + Integer.toHexString(c)); 363 } 364 return b.toString(); 365 } 366 367 protected List<String> sorted(Set<String> keys) { 368 List<String> names = new ArrayList<String>(); 369 names.addAll(keys); 370 Collections.sort(names); 371 return names; 372 } 373 374 public void commit(OutputStream destination, boolean header) throws IOException { 375 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 376 commitPrefixes(writer, header); 377 for (Section s : sections) { 378 commitSection(writer, s); 379 } 380 writer.ln("# -------------------------------------------------------------------------------------"); 381 writer.ln(); 382 writer.flush(); 383 writer.close(); 384 } 385 386 public String asHtml() throws Exception { 387 StringBuilder b = new StringBuilder(); 388 b.append("<pre class=\"rdf\">\r\n"); 389 commitPrefixes(b); 390 for (Section s : sections) { 391 commitSection(b, s); 392 } 393 b.append("</pre>\r\n"); 394 b.append("\r\n"); 395 return b.toString(); 396 } 397 398 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 399 if (header) { 400 writer.ln("# FHIR Sub-definitions"); 401 writer.write("# This is work in progress, and may change rapidly \r\n"); 402 writer.ln(); 403 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 404 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 405 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 406 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 407 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 408 writer.ln(); 409 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 410 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 411 writer.write("# predicates \r\n"); 412 writer.ln(); 413 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 414 writer.ln(); 415 writer 416 .write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 417 writer.ln(); 418 } 419 for (String p : sorted(prefixes.keySet())) 420 writer.ln("@prefix " + p + ": <" + prefixes.get(p) + "> ."); 421 writer.ln(); 422 if (header) { 423 writer.ln("# Predicates used in this file:"); 424 for (String s : sorted(predicateSet)) 425 writer.ln(" # " + s); 426 writer.ln(); 427 } 428 } 429 430 private void commitPrefixes(StringBuilder b) throws Exception { 431 for (String p : sorted(prefixes.keySet())) 432 b.append("@prefix " + p + ": <" + prefixes.get(p) + "> .\r\n"); 433 b.append("\r\n"); 434 } 435 436 // private String lastSubject = null; 437 // private String lastComment = ""; 438 439 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 440 writer.ln("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length())); 441 writer.ln(); 442 for (Subject sbj : section.subjects) { 443 if (Utilities.noString(sbj.id)) { 444 writer.write("["); 445 } else { 446 writer.write(sbj.id); 447 writer.write(" "); 448 } 449 int i = 0; 450 451 for (Predicate p : sbj.predicates) { 452 writer.write(p.getPredicate()); 453 writer.write(" "); 454 boolean first = true; 455 for (Triple o : p.getObjects()) { 456 if (first) 457 first = false; 458 else 459 writer.write(", "); 460 if (o instanceof StringType) 461 writer.write(((StringType) o).value); 462 else { 463 writer.write("["); 464 if (write((Complex) o, writer, 4)) 465 writer.write("\r\n ]"); 466 else 467 writer.write("]"); 468 } 469 } 470 String comment = p.comment == null ? "" : " # " + p.comment; 471 i++; 472 if (i < sbj.predicates.size()) 473 writer.write(";" + comment + "\r\n "); 474 else { 475 if (Utilities.noString(sbj.id)) 476 writer.write("]"); 477 writer.write(" ." + comment + "\r\n\r\n"); 478 } 479 } 480 } 481 } 482 483 private void commitSection(StringBuilder b, Section section) throws Exception { 484 b.append("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()) + "\r\n"); 485 b.append("\r\n"); 486 for (Subject sbj : section.subjects) { 487 b.append(Utilities.escapeXml(sbj.id)); 488 b.append(" "); 489 int i = 0; 490 491 for (Predicate p : sbj.predicates) { 492 b.append(p.makelink()); 493 b.append(" "); 494 boolean first = true; 495 for (Triple o : p.getObjects()) { 496 if (first) 497 first = false; 498 else 499 b.append(", "); 500 if (o instanceof StringType) 501 b.append(Utilities.escapeXml(((StringType) o).value)); 502 else { 503 b.append("["); 504 if (write((Complex) o, b, 4)) 505 b.append("\r\n ]"); 506 else 507 b.append("]"); 508 } 509 } 510 String comment = p.comment == null ? "" : " # " + p.comment; 511 i++; 512 if (i < sbj.predicates.size()) 513 b.append(";" + Utilities.escapeXml(comment) + "\r\n "); 514 else 515 b.append("." + Utilities.escapeXml(comment) + "\r\n\r\n"); 516 } 517 } 518 } 519 520 protected class LineOutputStreamWriter extends OutputStreamWriter { 521 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 522 super(out, "UTF-8"); 523 } 524 525 private void ln() throws IOException { 526 write("\r\n"); 527 } 528 529 private void ln(String s) throws IOException { 530 write(s); 531 write("\r\n"); 532 } 533 } 534 535 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 536 if (complex.predicates.isEmpty()) 537 return false; 538 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1 539 && complex.predicates.get(0).getObjects().get(0) instanceof StringType 540 && Utilities.noString(complex.predicates.get(0).comment)) { 541 writer.write(" " + complex.predicates.get(0).predicate + " " 542 + ((StringType) complex.predicates.get(0).getObjects().get(0)).value); 543 return false; 544 } 545 String left = Utilities.padLeft("", ' ', indent); 546 int i = 0; 547 for (Predicate po : complex.predicates) { 548 writer.write("\r\n"); 549 boolean first = true; 550 for (Triple o : po.getObjects()) { 551 if (first) { 552 first = false; 553 writer.write(left + " " + po.getPredicate() + " "); 554 } else 555 writer.write(", "); 556 if (o instanceof StringType) 557 writer.write(((StringType) o).value); 558 else { 559 writer.write("["); 560 if (write((Complex) o, writer, indent + 2)) 561 writer.write("\r\n" + left + " ]"); 562 else 563 writer.write(" ]"); 564 } 565 } 566 i++; 567 if (i < complex.predicates.size()) 568 writer.write(";"); 569 if (!Utilities.noString(po.comment)) 570 writer.write(" # " + escape(po.comment, false)); 571 } 572 return true; 573 } 574 575 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 576 if (complex.predicates.isEmpty()) 577 return false; 578 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1 579 && complex.predicates.get(0).getObjects().get(0) instanceof StringType 580 && Utilities.noString(complex.predicates.get(0).comment)) { 581 b.append(" " + complex.predicates.get(0).makelink() + " " 582 + Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 583 return false; 584 } 585 String left = Utilities.padLeft("", ' ', indent); 586 int i = 0; 587 for (Predicate po : complex.predicates) { 588 b.append("\r\n"); 589 boolean first = true; 590 for (Triple o : po.getObjects()) { 591 if (first) { 592 first = false; 593 b.append(left + " " + po.makelink() + " "); 594 } else 595 b.append(", "); 596 if (o instanceof StringType) 597 b.append(Utilities.escapeXml(((StringType) o).value)); 598 else { 599 b.append("["); 600 if (write((Complex) o, b, indent + 2)) 601 b.append(left + " ]"); 602 else 603 b.append(" ]"); 604 } 605 } 606 i++; 607 if (i < complex.predicates.size()) 608 b.append(";"); 609 if (!Utilities.noString(po.comment)) 610 b.append(" # " + Utilities.escapeXml(escape(po.comment, false))); 611 } 612 return true; 613 } 614 615 public abstract class TTLObject { 616 protected int line; 617 protected int col; 618 619 abstract public boolean hasValue(String value); 620 621 public int getLine() { 622 return line; 623 } 624 625 public int getCol() { 626 return col; 627 } 628 629 } 630 631 public class TTLLiteral extends TTLObject { 632 633 private String value; 634 private String type; 635 636 protected TTLLiteral(int line, int col) { 637 this.line = line; 638 this.col = col; 639 } 640 641 @Override 642 public boolean hasValue(String value) { 643 return value.equals(this.value); 644 } 645 646 public String getValue() { 647 return value; 648 } 649 650 public String getType() { 651 return type; 652 } 653 654 } 655 656 public class TTLURL extends TTLObject { 657 private String uri; 658 659 protected TTLURL(int line, int col) { 660 this.line = line; 661 this.col = col; 662 } 663 664 public String getUri() { 665 return uri; 666 } 667 668 public void setUri(String uri) throws FHIRFormatError { 669 if (!uri.matches(IRI_URL)) 670 throw new FHIRFormatError("Illegal URI " + uri); 671 this.uri = uri; 672 } 673 674 @Override 675 public boolean hasValue(String value) { 676 return value.equals(this.uri); 677 } 678 } 679 680 public class TTLList extends TTLObject { 681 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 682 683 public TTLList(TTLObject obj) { 684 super(); 685 list.add(obj); 686 } 687 688 @Override 689 public boolean hasValue(String value) { 690 for (TTLObject obj : list) 691 if (obj.hasValue(value)) 692 return true; 693 return false; 694 } 695 696 public List<TTLObject> getList() { 697 return list; 698 } 699 700 } 701 702 public class TTLComplex extends TTLObject { 703 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 704 705 protected TTLComplex(int line, int col) { 706 this.line = line; 707 this.col = col; 708 } 709 710 public Map<String, TTLObject> getPredicates() { 711 return predicates; 712 } 713 714 @Override 715 public boolean hasValue(String value) { 716 return false; 717 } 718 719 public void addPredicate(String uri, TTLObject obj) { 720 if (!predicates.containsKey(uri)) 721 predicates.put(uri, obj); 722 else { 723 TTLObject eo = predicates.get(uri); 724 TTLList list = null; 725 if (eo instanceof TTLList) 726 list = (TTLList) eo; 727 else { 728 list = new TTLList(eo); 729 predicates.put(uri, list); 730 } 731 list.list.add(obj); 732 } 733 } 734 735 public void addPredicates(Map<String, TTLObject> values) { 736 for (String s : values.keySet()) { 737 addPredicate(s, values.get(s)); 738 } 739 } 740 } 741 742 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 743 744 private Object base; 745 746 public enum LexerTokenType { 747 TOKEN, // [, ], :, @ 748 WORD, // a word 749 URI, // a URI <> 750 LITERAL // "..." 751 } 752 753 public class Lexer { 754 755 private String source; 756 private LexerTokenType type; 757 private int cursor, line, col, startLine, startCol; 758 private String token; 759 760 public Lexer(String source) throws FHIRFormatError { 761 this.source = source; 762 cursor = 0; 763 line = 1; 764 col = 1; 765 readNext(false); 766 } 767 768 private void skipWhitespace() { 769 while (cursor < source.length()) { 770 char ch = source.charAt(cursor); 771 if (Character.isWhitespace(ch)) 772 grab(); 773 else if (ch == '#') { 774 ch = grab(); 775 while (cursor < source.length()) { 776 ch = grab(); 777 if (ch == '\r' || ch == '\n') { 778 break; 779 } 780 } 781 } else 782 break; 783 } 784 } 785 786 private char grab() { 787 char c = source.charAt(cursor); 788 if (c == '\n') { 789 line++; 790 col = 1; 791 } else 792 col++; 793 794 cursor++; 795 return c; 796 } 797 798 private void readNext(boolean postColon) throws FHIRFormatError { 799 token = null; 800 type = null; 801 skipWhitespace(); 802 if (cursor >= source.length()) 803 return; 804 startLine = line; 805 startCol = col; 806 char ch = grab(); 807 StringBuilder b = new StringBuilder(); 808 switch (ch) { 809 case '@': 810 case '.': 811 case ':': 812 case ';': 813 case '^': 814 case ',': 815 case ']': 816 case '[': 817 case '(': 818 case ')': 819 type = LexerTokenType.TOKEN; 820 b.append(ch); 821 token = b.toString(); 822 return; 823 case '<': 824 while (cursor < source.length()) { 825 ch = grab(); 826 if (ch == '>') 827 break; 828 b.append(ch); 829 } 830 type = LexerTokenType.URI; 831 token = unescape(b.toString(), true); 832 return; 833 case '"': 834 b.append(ch); 835 String end = "\""; 836 while (cursor < source.length()) { 837 ch = grab(); 838 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 839 cursor--; 840 break; 841 } 842 b.append(ch); 843 if (ch == '"') 844 if (b.toString().equals("\"\"\"")) 845 end = "\"\"\""; 846 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\" + end)) 847 break; 848 } 849 type = LexerTokenType.LITERAL; 850 token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false); 851 return; 852 case '\'': 853 b.append(ch); 854 end = "'"; 855 while (cursor < source.length()) { 856 ch = grab(); 857 if (b.equals("''") && ch != '\'') { 858 cursor--; 859 break; 860 } 861 b.append(ch); 862 if (b.toString().equals("'''")) 863 end = "'''"; 864 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 865 break; 866 } 867 type = LexerTokenType.LITERAL; 868 token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false); 869 return; 870 default: 871 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') 872 || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 873 b.append(ch); 874 while (cursor < source.length()) { 875 ch = grab(); 876 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 877 // 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', 878 // '-', '+', '.', '\\', '#')) 879 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') 880 || ((ch == ':') && !postColon)) 881 break; 882 b.append(ch); 883 } 884 type = LexerTokenType.WORD; 885 token = b.toString(); 886 cursor--; 887 return; 888 } else 889 throw error("unexpected lexer char " + ch); 890 } 891 } 892 893 private String unescape(String s, boolean isUri) throws FHIRFormatError { 894 StringBuilder b = new StringBuilder(); 895 int i = 0; 896 while (i < s.length()) { 897 char ch = s.charAt(i); 898 if (ch == '\\' && i < s.length() - 1) { 899 i++; 900 switch (s.charAt(i)) { 901 case 't': 902 b.append('\t'); 903 break; 904 case 'r': 905 b.append('\r'); 906 break; 907 case 'n': 908 b.append('\n'); 909 break; 910 case 'f': 911 b.append('\f'); 912 break; 913 case '\'': 914 b.append('\''); 915 break; 916 case '\"': 917 b.append('\"'); 918 break; 919 case '\\': 920 b.append('\\'); 921 break; 922 case '/': 923 b.append('\\'); 924 break; 925 case 'U': 926 case 'u': 927 i++; 928 int l = 4; 929 int uc = Integer.parseInt(s.substring(i, i + l), 16); 930 if (uc < (isUri ? 33 : 32)) { 931 l = 8; 932 uc = Integer.parseInt(s.substring(i, i + 8), 16); 933 } 934 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 935 throw new FHIRFormatError("Illegal unicode character"); 936 b.append(Character.toString(uc)); 937 i = i + l; 938 break; 939 default: 940 throw new FHIRFormatError("Unknown character escape \\" + s.charAt(i)); 941 } 942 } else { 943 b.append(ch); 944 } 945 i++; 946 } 947 return b.toString(); 948 } 949 950 public boolean done() { 951 return type == null; 952 } 953 954 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 955 if (type != null && this.type != type) 956 throw error("Unexpected type. Found " + this.type.toString() + " looking for a " + type.toString()); 957 String res = token; 958 readNext(postColon); 959 return res; 960 } 961 962 public String peek() throws Exception { 963 return token; 964 } 965 966 public LexerTokenType peekType() { 967 return type; 968 } 969 970 public void token(String token) throws FHIRFormatError { 971 if (!token.equals(this.token)) 972 throw error("Unexpected word " + this.token + " looking for " + token); 973 next(LexerTokenType.TOKEN, token.equals(":")); 974 } 975 976 public void word(String word) throws Exception { 977 if (!word.equals(this.token)) 978 throw error("Unexpected word " + this.token + " looking for " + word); 979 next(LexerTokenType.WORD, false); 980 } 981 982 public String word() throws FHIRFormatError { 983 String t = token; 984 next(LexerTokenType.WORD, false); 985 return t; 986 } 987 988 public String uri() throws FHIRFormatError { 989 if (this.type != LexerTokenType.URI) 990 throw error("Unexpected type. Found " + this.type.toString() + " looking for a URI"); 991 String t = token; 992 next(LexerTokenType.URI, false); 993 return t; 994 } 995 996 public String literal() throws FHIRFormatError { 997 if (this.type != LexerTokenType.LITERAL) 998 throw error("Unexpected type. Found " + this.type.toString() + " looking for a Literal"); 999 String t = token; 1000 next(LexerTokenType.LITERAL, false); 1001 return t; 1002 } 1003 1004 public boolean peek(LexerTokenType type, String token) { 1005 return this.type == type && this.token.equals(token); 1006 } 1007 1008 public FHIRFormatError error(String message) { 1009 return new FHIRFormatError("Syntax Error parsing Turtle on line " + Integer.toString(line) + " col " 1010 + Integer.toString(col) + ": " + message); 1011 } 1012 1013 } 1014 // 1015 // public void importTtl(Section sct, String ttl) throws Exception { 1016 // if (!Utilities.noString(ttl)) { 1017 // // System.out.println("import ttl: "+ttl); 1018 // Lexer lexer = new Lexer(ttl); 1019 // String subject = null; 1020 // String predicate = null; 1021 // while (!lexer.done()) { 1022 // if (subject == null) 1023 // subject = lexer.next(); 1024 // if (predicate == null) 1025 // predicate = lexer.next(); 1026 // if (lexer.peekType() == null) { 1027 // throw new Error("Unexpected end of input parsing turtle"); 1028 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1029 // sct.triple(subject, predicate, lexer.next()); 1030 // } else if (lexer.peek() == null) { 1031 // throw new Error("Unexected - turtle lexer found no token"); 1032 // } else if (lexer.peek().equals("[")) { 1033 // sct.triple(subject, predicate, importComplex(lexer)); 1034 // } else 1035 // throw new Exception("Not done yet"); 1036 // String n = lexer.next(); 1037 // if (Utilities.noString(n)) 1038 // break; 1039 // if (n.equals(".")) { 1040 // subject = null; 1041 // predicate = null; 1042 // } else if (n.equals(";")) { 1043 // predicate = null; 1044 // } else if (!n.equals(",")) 1045 // throw new Exception("Unexpected token "+n); 1046 // } 1047 // } 1048 // } 1049 1050 public void parse(String source) throws FHIRFormatError { 1051 prefixes.clear(); 1052 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1053 parse(new Lexer(source)); 1054 } 1055 1056 private void parse(Lexer lexer) throws FHIRFormatError { 1057 boolean doPrefixes = true; 1058 while (!lexer.done()) { 1059 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") 1060 || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1061 boolean sparqlStyle = false; 1062 boolean base = false; 1063 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1064 lexer.token("@"); 1065 String p = lexer.word(); 1066 if (p.equals("base")) 1067 base = true; 1068 else if (!p.equals("prefix")) 1069 throw new FHIRFormatError("Unexpected token " + p); 1070 } else { 1071 sparqlStyle = true; 1072 String p = lexer.word(); 1073 if (p.equals("BASE")) 1074 base = true; 1075 else if (!p.equals("PREFIX")) 1076 throw new FHIRFormatError("Unexpected token " + p); 1077 } 1078 String prefix = null; 1079 if (!base) { 1080 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1081 lexer.token(":"); 1082 } 1083 String url = lexer.next(LexerTokenType.URI, false); 1084 if (!sparqlStyle) 1085 lexer.token("."); 1086 if (!base) 1087 prefix(prefix, url); 1088 else if (this.base == null) 1089 this.base = url; 1090 else 1091 throw new FHIRFormatError("Duplicate @base"); 1092 } else if (lexer.peekType() == LexerTokenType.URI) { 1093 doPrefixes = false; 1094 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1095 uri.setUri(lexer.uri()); 1096 TTLComplex complex = parseComplex(lexer); 1097 objects.put(uri, complex); 1098 lexer.token("."); 1099 } else if (lexer.peekType() == LexerTokenType.WORD) { 1100 doPrefixes = false; 1101 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1102 String pfx = lexer.word(); 1103 if (!prefixes.containsKey(pfx)) 1104 throw new FHIRFormatError("Unknown prefix " + pfx); 1105 lexer.token(":"); 1106 uri.setUri(prefixes.get(pfx) + lexer.word()); 1107 TTLComplex complex = parseComplex(lexer); 1108 objects.put(uri, complex); 1109 lexer.token("."); 1110 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1111 doPrefixes = false; 1112 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1113 lexer.token(":"); 1114 if (!prefixes.containsKey(null)) 1115 throw new FHIRFormatError("Unknown prefix ''"); 1116 uri.setUri(prefixes.get(null) + lexer.word()); 1117 TTLComplex complex = parseComplex(lexer); 1118 objects.put(uri, complex); 1119 lexer.token("."); 1120 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1121 doPrefixes = false; 1122 lexer.token("["); 1123 TTLComplex bnode = parseComplex(lexer); 1124 lexer.token("]"); 1125 TTLComplex complex = null; 1126 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1127 complex = parseComplex(lexer); 1128 // at this point, we collapse bnode and complex, and give bnode a fictional 1129 // identity 1130 bnode.addPredicates(complex.predicates); 1131 } 1132 1133 objects.put(anonymousId(), bnode); 1134 lexer.token("."); 1135 } else 1136 throw lexer.error("Unknown token " + lexer.token); 1137 } 1138 } 1139 1140 private TTLURL anonymousId() throws FHIRFormatError { 1141 TTLURL url = new TTLURL(-1, -1); 1142 url.setUri("urn:uuid:" + UUID.randomUUID().toString().toLowerCase()); 1143 return url; 1144 } 1145 1146 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1147 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1148 1149 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1150 while (!done) { 1151 String uri = null; 1152 if (lexer.peekType() == LexerTokenType.URI) 1153 uri = lexer.uri(); 1154 else { 1155 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1156 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1157 lexer.token(":"); 1158 if (!prefixes.containsKey(t)) 1159 throw new FHIRFormatError("unknown prefix " + t); 1160 uri = prefixes.get(t) + lexer.word(); 1161 } else if (t.equals("a")) 1162 uri = prefixes.get("rdfs") + "type"; 1163 else 1164 throw lexer.error("unexpected token"); 1165 } 1166 1167 boolean inlist = false; 1168 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1169 inlist = true; 1170 lexer.token("("); 1171 } 1172 1173 boolean rpt = false; 1174 do { 1175 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1176 lexer.token("["); 1177 result.addPredicate(uri, parseComplex(lexer)); 1178 lexer.token("]"); 1179 } else if (lexer.peekType() == LexerTokenType.URI) { 1180 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1181 u.setUri(lexer.uri()); 1182 result.addPredicate(uri, u); 1183 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1184 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1185 u.value = lexer.literal(); 1186 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1187 lexer.token("^"); 1188 lexer.token("^"); 1189 if (lexer.peekType() == LexerTokenType.URI) { 1190 u.type = lexer.uri(); 1191 } else { 1192 String l = lexer.word(); 1193 lexer.token(":"); 1194 u.type = prefixes.get(l) + lexer.word(); 1195 } 1196 } 1197 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1198 // lang tag - skip it 1199 lexer.token("@"); 1200 String lang = lexer.word(); 1201 if (!lang.matches(LANG_REGEX)) { 1202 throw new FHIRFormatError("Invalid Language tag " + lang); 1203 } 1204 } 1205 result.addPredicate(uri, u); 1206 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1207 int sl = lexer.startLine; 1208 int sc = lexer.startCol; 1209 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1210 if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1211 TTLLiteral u = new TTLLiteral(sl, sc); 1212 u.value = pfx; 1213 result.addPredicate(uri, u); 1214 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1215 TTLLiteral u = new TTLLiteral(sl, sc); 1216 u.value = pfx; 1217 result.addPredicate(uri, u); 1218 } else { 1219 if (!prefixes.containsKey(pfx)) 1220 throw new FHIRFormatError("Unknown prefix " + (pfx == null ? "''" : pfx)); 1221 TTLURL u = new TTLURL(sl, sc); 1222 lexer.token(":"); 1223 u.setUri(prefixes.get(pfx) + lexer.word()); 1224 result.addPredicate(uri, u); 1225 } 1226 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1227 throw new FHIRFormatError("unexpected token " + lexer.token); 1228 } 1229 1230 if (inlist) 1231 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1232 else { 1233 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1234 if (rpt) 1235 lexer.readNext(false); 1236 } 1237 } while (rpt); 1238 if (inlist) 1239 lexer.token(")"); 1240 1241 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1242 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1243 lexer.token(";"); 1244 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1245 } else { 1246 done = true; 1247 } 1248 } 1249 return result; 1250 } 1251 1252 public Map<TTLURL, TTLComplex> getObjects() { 1253 return objects; 1254 } 1255 1256 public TTLComplex getObject(String url) { 1257 for (TTLURL t : objects.keySet()) { 1258 if (t.getUri().equals(url)) 1259 return objects.get(t); 1260 } 1261 return null; 1262 } 1263 1264 // public void parseFragment(Lexer lexer) throws Exception { 1265 // lexer.next(); // read [ 1266 // Complex obj = new Complex(); 1267 // while (!lexer.peek().equals("]")) { 1268 // String predicate = lexer.next(); 1269 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == 1270 // LexerTokenType.LITERAL) { 1271 // obj.predicate(predicate, lexer.next()); 1272 // } else if (lexer.peek().equals("[")) { 1273 // obj.predicate(predicate, importComplex(lexer)); 1274 // } else 1275 // throw new Exception("Not done yet"); 1276 // if (lexer.peek().equals(";")) 1277 // lexer.next(); 1278 // } 1279 // lexer.next(); // read ] 1280 // //return obj; 1281 // } 1282 // 1283 // public void importTtl(Section sct, String ttl) throws Exception { 1284 // if (!Utilities.noString(ttl)) { 1285 // // System.out.println("import ttl: "+ttl); 1286 // Lexer lexer = new Lexer(ttl); 1287 // String subject = null; 1288 // String predicate = null; 1289 // while (!lexer.done()) { 1290 // if (subject == null) 1291 // subject = lexer.next(); 1292 // if (predicate == null) 1293 // predicate = lexer.next(); 1294 // if (lexer.peekType() == null) { 1295 // throw new Error("Unexpected end of input parsing turtle"); 1296 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1297 // sct.triple(subject, predicate, lexer.next()); 1298 // } else if (lexer.peek() == null) { 1299 // throw new Error("Unexected - turtle lexer found no token"); 1300 // } else if (lexer.peek().equals("[")) { 1301 // sct.triple(subject, predicate, importComplex(lexer)); 1302 // } else 1303 // throw new Exception("Not done yet"); 1304 // String n = lexer.next(); 1305 // if (Utilities.noString(n)) 1306 // break; 1307 // if (n.equals(".")) { 1308 // subject = null; 1309 // predicate = null; 1310 // } else if (n.equals(";")) { 1311 // predicate = null; 1312 // } else if (!n.equals(",")) 1313 // throw new Exception("Unexpected token "+n); 1314 // } 1315 // } 1316 // } 1317 1318 // private Complex importComplex(Lexer lexer) throws Exception { 1319 // lexer.next(); // read [ 1320 // Complex obj = new Complex(); 1321 // while (!lexer.peek().equals("]")) { 1322 // String predicate = lexer.next(); 1323 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == 1324 // LexerTokenType.LITERAL) { 1325 // obj.predicate(predicate, lexer.next()); 1326 // } else if (lexer.peek().equals("[")) { 1327 // obj.predicate(predicate, importComplex(lexer)); 1328 // } else 1329 // throw new Exception("Not done yet"); 1330 // if (lexer.peek().equals(";")) 1331 // lexer.next(); 1332 // } 1333 // lexer.next(); // read ] 1334 // return obj; 1335 // } 1336 1337}