
001package org.hl7.fhir.r4.utils.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032import java.io.IOException; 033import java.io.OutputStream; 034import java.io.OutputStreamWriter; 035import java.io.UnsupportedEncodingException; 036import java.util.ArrayList; 037import java.util.Collections; 038import java.util.HashMap; 039import java.util.HashSet; 040import java.util.List; 041import java.util.Map; 042import java.util.Set; 043import java.util.UUID; 044 045import org.hl7.fhir.exceptions.FHIRFormatError; 046import org.hl7.fhir.utilities.Utilities; 047 048@Deprecated 049public class Turtle { 050 051 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 052 053 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\=" 054 + GOOD_IRI_CHAR + "])+"; 055 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 056 057 // Object model 058 public abstract class Triple { 059 private String uri; 060 } 061 062 public class StringType extends Triple { 063 private String value; 064 065 public StringType(String value) { 066 super(); 067 this.value = value; 068 } 069 } 070 071 public class Complex extends Triple { 072 protected List<Predicate> predicates = new ArrayList<Predicate>(); 073 074 public Complex predicate(String predicate, String object) { 075 predicateSet.add(predicate); 076 objectSet.add(object); 077 return predicate(predicate, new StringType(object)); 078 } 079 080 public Complex linkedPredicate(String predicate, String object, String link) { 081 predicateSet.add(predicate); 082 objectSet.add(object); 083 return linkedPredicate(predicate, new StringType(object), link); 084 } 085 086 public Complex predicate(String predicate, Triple object) { 087 Predicate p = getPredicate(predicate); 088 if (p == null) { 089 p = new Predicate(); 090 p.predicate = predicate; 091 predicateSet.add(predicate); 092 predicates.add(p); 093 } 094 if (object instanceof StringType) 095 objectSet.add(((StringType) object).value); 096 p.objects.add(object); 097 return this; 098 } 099 100 protected Predicate getPredicate(String predicate) { 101 for (Predicate p : predicates) 102 if (p.predicate.equals(predicate)) 103 return p; 104 return null; 105 } 106 107 public Complex linkedPredicate(String predicate, Triple object, String link) { 108 Predicate p = getPredicate(predicate); 109 if (p == null) { 110 p = new Predicate(); 111 p.predicate = predicate; 112 p.link = link; 113 predicateSet.add(predicate); 114 predicates.add(p); 115 } 116 if (object instanceof StringType) 117 objectSet.add(((StringType) object).value); 118 p.objects.add(object); 119 return this; 120 } 121 122 public Complex predicate(String predicate) { 123 predicateSet.add(predicate); 124 Complex c = complex(); 125 predicate(predicate, c); 126 return c; 127 } 128 129 public Complex linkedPredicate(String predicate, String link) { 130 predicateSet.add(predicate); 131 Complex c = complex(); 132 linkedPredicate(predicate, c, link); 133 return c; 134 } 135 136 public void prefix(String code, String url) { 137 Turtle.this.prefix(code, url); 138 } 139 } 140 141 private class Predicate { 142 protected String predicate; 143 protected String link; 144 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 145 protected String comment; 146 147 public String getPredicate() { 148 return predicate; 149 } 150 151 public String makelink() { 152 if (link == null) 153 return predicate; 154 else 155 return "<a href=\"" + link + "\">" + Utilities.escapeXml(predicate) + "</a>"; 156 } 157 158 public List<Triple> getObjects() { 159 return objects; 160 } 161 162 public String getComment() { 163 return comment; 164 } 165 } 166 167 public class Subject extends Complex { 168 private String id; 169 170 public Predicate predicate(String predicate, Triple object, String comment) { 171 Predicate p = getPredicate(predicate); 172 if (p == null) { 173 p = new Predicate(); 174 p.predicate = predicate; 175 predicateSet.add(predicate); 176 predicates.add(p); 177 p.comment = comment; 178 } 179 if (object instanceof StringType) 180 objectSet.add(((StringType) object).value); 181 p.objects.add(object); 182 return p; 183 } 184 185 public void comment(String comment) { 186 if (!Utilities.noString(comment)) { 187 predicate("rdfs:comment", literal(comment)); 188 predicate("dcterms:description", literal(comment)); 189 } 190 } 191 192 public void label(String label) { 193 if (!Utilities.noString(label)) { 194 predicate("rdfs:label", literal(label)); 195 predicate("dc:title", literal(label)); 196 } 197 } 198 199 } 200 201 public class Section { 202 private String name; 203 private List<Subject> subjects = new ArrayList<Subject>(); 204 205 public Subject triple(String subject, String predicate, String object, String comment) { 206 return triple(subject, predicate, new StringType(object), comment); 207 } 208 209 public Subject triple(String subject, String predicate, String object) { 210 return triple(subject, predicate, new StringType(object)); 211 } 212 213 public Subject triple(String subject, String predicate, Triple object) { 214 return triple(subject, predicate, object, null); 215 } 216 217 public Subject triple(String subject, String predicate, Triple object, String comment) { 218 Subject s = subject(subject); 219 s.predicate(predicate, object, comment); 220 return s; 221 } 222 223 public void comment(String subject, String comment) { 224 triple(subject, "rdfs:comment", literal(comment)); 225 triple(subject, "dcterms:description", literal(comment)); 226 } 227 228 public void label(String subject, String comment) { 229 triple(subject, "rdfs:label", literal(comment)); 230 triple(subject, "dc:title", literal(comment)); 231 } 232 233 public Subject subject(String subject) { 234 for (Subject ss : subjects) 235 if (ss.id.equals(subject)) 236 return ss; 237 Subject s = new Subject(); 238 s.id = subject; 239 subjects.add(s); 240 return s; 241 } 242 243 public boolean hasSubject(String subject) { 244 for (Subject ss : subjects) 245 if (ss.id.equals(subject)) 246 return true; 247 return false; 248 } 249 } 250 251 private List<Section> sections = new ArrayList<Section>(); 252 protected Set<String> subjectSet = new HashSet<String>(); 253 protected Set<String> predicateSet = new HashSet<String>(); 254 protected Set<String> objectSet = new HashSet<String>(); 255 protected Map<String, String> prefixes = new HashMap<String, String>(); 256 257 public void prefix(String code, String url) { 258 prefixes.put(code, url); 259 } 260 261 protected boolean hasSection(String sn) { 262 for (Section s : sections) 263 if (s.name.equals(sn)) 264 return true; 265 return false; 266 267 } 268 269 public Section section(String sn) { 270 if (hasSection(sn)) 271 throw new Error("Duplicate section name " + sn); 272 Section s = new Section(); 273 s.name = sn; 274 sections.add(s); 275 return s; 276 } 277 278 protected String matches(String url, String prefixUri, String prefix) { 279 if (url.startsWith(prefixUri)) { 280 prefixes.put(prefix, prefixUri); 281 return prefix + ":" + escape(url.substring(prefixUri.length()), false); 282 } 283 return null; 284 } 285 286 protected Complex complex() { 287 return new Complex(); 288 } 289 290 private void checkPrefix(Triple object) { 291 if (object instanceof StringType) 292 checkPrefix(((StringType) object).value); 293 else { 294 Complex obj = (Complex) object; 295 for (Predicate po : obj.predicates) { 296 checkPrefix(po.getPredicate()); 297 for (Triple o : po.getObjects()) 298 checkPrefix(o); 299 } 300 } 301 } 302 303 protected void checkPrefix(String pname) { 304 if (pname.startsWith("(")) 305 return; 306 if (pname.startsWith("\"")) 307 return; 308 if (pname.startsWith("<")) 309 return; 310 311 if (pname.contains(":")) { 312 String prefix = pname.substring(0, pname.indexOf(":")); 313 if (!prefixes.containsKey(prefix) && !prefix.equals("http") && !prefix.equals("urn")) 314 throw new Error("undefined prefix " + prefix); 315 } 316 } 317 318 protected StringType literal(String s) { 319 return new StringType("\"" + escape(s, true) + "\""); 320 } 321 322 protected StringType literalTyped(String s, String t) { 323 return new StringType("\"" + escape(s, true) + "\"^^xs:" + t); 324 } 325 326 public static String escape(String s, boolean string) { 327 if (s == null) 328 return ""; 329 330 StringBuilder b = new StringBuilder(); 331 for (char c : s.toCharArray()) { 332 if (c == '\r') 333 b.append("\\r"); 334 else if (c == '\n') 335 b.append("\\n"); 336 else if (c == '"') 337 b.append("\\\""); 338 else if (c == '\\') 339 b.append("\\\\"); 340 else if (c == '/' && !string) 341 b.append("\\/"); 342 else 343 b.append(c); 344 } 345 return b.toString(); 346 } 347 348 protected String pctEncode(String s) { 349 if (s == null) 350 return ""; 351 352 StringBuilder b = new StringBuilder(); 353 for (char c : s.toCharArray()) { 354 if (c >= 'A' && c <= 'Z') 355 b.append(c); 356 else if (c >= 'a' && c <= 'z') 357 b.append(c); 358 else if (c >= '0' && c <= '9') 359 b.append(c); 360 else if (c == '.') 361 b.append(c); 362 else 363 b.append("%" + Integer.toHexString(c)); 364 } 365 return b.toString(); 366 } 367 368 protected List<String> sorted(Set<String> keys) { 369 List<String> names = new ArrayList<String>(); 370 names.addAll(keys); 371 Collections.sort(names); 372 return names; 373 } 374 375 public void commit(OutputStream destination, boolean header) throws IOException { 376 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 377 commitPrefixes(writer, header); 378 for (Section s : sections) { 379 commitSection(writer, s); 380 } 381 writer.ln("# -------------------------------------------------------------------------------------"); 382 writer.ln(); 383 writer.flush(); 384 writer.close(); 385 } 386 387 public String asHtml() throws Exception { 388 StringBuilder b = new StringBuilder(); 389 b.append("<pre class=\"rdf\">\r\n"); 390 commitPrefixes(b); 391 for (Section s : sections) { 392 commitSection(b, s); 393 } 394 b.append("</pre>\r\n"); 395 b.append("\r\n"); 396 return b.toString(); 397 } 398 399 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 400 if (header) { 401 writer.ln("# FHIR Sub-definitions"); 402 writer.write("# This is work in progress, and may change rapidly \r\n"); 403 writer.ln(); 404 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 405 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 406 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 407 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 408 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 409 writer.ln(); 410 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 411 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 412 writer.write("# predicates \r\n"); 413 writer.ln(); 414 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 415 writer.ln(); 416 writer 417 .write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 418 writer.ln(); 419 } 420 for (String p : sorted(prefixes.keySet())) 421 writer.ln("@prefix " + p + ": <" + prefixes.get(p) + "> ."); 422 writer.ln(); 423 if (header) { 424 writer.ln("# Predicates used in this file:"); 425 for (String s : sorted(predicateSet)) 426 writer.ln(" # " + s); 427 writer.ln(); 428 } 429 } 430 431 private void commitPrefixes(StringBuilder b) throws Exception { 432 for (String p : sorted(prefixes.keySet())) 433 b.append("@prefix " + p + ": <" + prefixes.get(p) + "> .\r\n"); 434 b.append("\r\n"); 435 } 436 437 // private String lastSubject = null; 438 // private String lastComment = ""; 439 440 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 441 writer.ln("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length())); 442 writer.ln(); 443 for (Subject sbj : section.subjects) { 444 if (Utilities.noString(sbj.id)) { 445 writer.write("["); 446 } else { 447 writer.write(sbj.id); 448 writer.write(" "); 449 } 450 int i = 0; 451 452 for (Predicate p : sbj.predicates) { 453 writer.write(p.getPredicate()); 454 writer.write(" "); 455 boolean first = true; 456 for (Triple o : p.getObjects()) { 457 if (first) 458 first = false; 459 else 460 writer.write(", "); 461 if (o instanceof StringType) 462 writer.write(((StringType) o).value); 463 else { 464 writer.write("["); 465 if (write((Complex) o, writer, 4)) 466 writer.write("\r\n ]"); 467 else 468 writer.write("]"); 469 } 470 } 471 String comment = p.comment == null ? "" : " # " + p.comment; 472 i++; 473 if (i < sbj.predicates.size()) 474 writer.write(";" + comment + "\r\n "); 475 else { 476 if (Utilities.noString(sbj.id)) 477 writer.write("]"); 478 writer.write(" ." + comment + "\r\n\r\n"); 479 } 480 } 481 } 482 } 483 484 private void commitSection(StringBuilder b, Section section) throws Exception { 485 b.append("# - " + section.name + " " + Utilities.padLeft("", '-', 75 - section.name.length()) + "\r\n"); 486 b.append("\r\n"); 487 for (Subject sbj : section.subjects) { 488 b.append(Utilities.escapeXml(sbj.id)); 489 b.append(" "); 490 int i = 0; 491 492 for (Predicate p : sbj.predicates) { 493 b.append(p.makelink()); 494 b.append(" "); 495 boolean first = true; 496 for (Triple o : p.getObjects()) { 497 if (first) 498 first = false; 499 else 500 b.append(", "); 501 if (o instanceof StringType) 502 b.append(Utilities.escapeXml(((StringType) o).value)); 503 else { 504 b.append("["); 505 if (write((Complex) o, b, 4)) 506 b.append("\r\n ]"); 507 else 508 b.append("]"); 509 } 510 } 511 String comment = p.comment == null ? "" : " # " + p.comment; 512 i++; 513 if (i < sbj.predicates.size()) 514 b.append(";" + Utilities.escapeXml(comment) + "\r\n "); 515 else 516 b.append("." + Utilities.escapeXml(comment) + "\r\n\r\n"); 517 } 518 } 519 } 520 521 protected class LineOutputStreamWriter extends OutputStreamWriter { 522 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 523 super(out, "UTF-8"); 524 } 525 526 private void ln() throws IOException { 527 write("\r\n"); 528 } 529 530 private void ln(String s) throws IOException { 531 write(s); 532 write("\r\n"); 533 } 534 } 535 536 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 537 if (complex.predicates.isEmpty()) 538 return false; 539 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1 540 && complex.predicates.get(0).getObjects().get(0) instanceof StringType 541 && Utilities.noString(complex.predicates.get(0).comment)) { 542 writer.write(" " + complex.predicates.get(0).predicate + " " 543 + ((StringType) complex.predicates.get(0).getObjects().get(0)).value); 544 return false; 545 } 546 String left = Utilities.padLeft("", ' ', indent); 547 int i = 0; 548 for (Predicate po : complex.predicates) { 549 writer.write("\r\n"); 550 boolean first = true; 551 for (Triple o : po.getObjects()) { 552 if (first) { 553 first = false; 554 writer.write(left + " " + po.getPredicate() + " "); 555 } else 556 writer.write(", "); 557 if (o instanceof StringType) 558 writer.write(((StringType) o).value); 559 else { 560 writer.write("["); 561 if (write((Complex) o, writer, indent + 2)) 562 writer.write("\r\n" + left + " ]"); 563 else 564 writer.write(" ]"); 565 } 566 } 567 i++; 568 if (i < complex.predicates.size()) 569 writer.write(";"); 570 if (!Utilities.noString(po.comment)) 571 writer.write(" # " + escape(po.comment, false)); 572 } 573 return true; 574 } 575 576 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 577 if (complex.predicates.isEmpty()) 578 return false; 579 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size() == 1 580 && complex.predicates.get(0).getObjects().get(0) instanceof StringType 581 && Utilities.noString(complex.predicates.get(0).comment)) { 582 b.append(" " + complex.predicates.get(0).makelink() + " " 583 + Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 584 return false; 585 } 586 String left = Utilities.padLeft("", ' ', indent); 587 int i = 0; 588 for (Predicate po : complex.predicates) { 589 b.append("\r\n"); 590 boolean first = true; 591 for (Triple o : po.getObjects()) { 592 if (first) { 593 first = false; 594 b.append(left + " " + po.makelink() + " "); 595 } else 596 b.append(", "); 597 if (o instanceof StringType) 598 b.append(Utilities.escapeXml(((StringType) o).value)); 599 else { 600 b.append("["); 601 if (write((Complex) o, b, indent + 2)) 602 b.append(left + " ]"); 603 else 604 b.append(" ]"); 605 } 606 } 607 i++; 608 if (i < complex.predicates.size()) 609 b.append(";"); 610 if (!Utilities.noString(po.comment)) 611 b.append(" # " + Utilities.escapeXml(escape(po.comment, false))); 612 } 613 return true; 614 } 615 616 public abstract class TTLObject { 617 protected int line; 618 protected int col; 619 620 abstract public boolean hasValue(String value); 621 622 public int getLine() { 623 return line; 624 } 625 626 public int getCol() { 627 return col; 628 } 629 630 } 631 632 public class TTLLiteral extends TTLObject { 633 634 private String value; 635 private String type; 636 637 protected TTLLiteral(int line, int col) { 638 this.line = line; 639 this.col = col; 640 } 641 642 @Override 643 public boolean hasValue(String value) { 644 return value.equals(this.value); 645 } 646 647 public String getValue() { 648 return value; 649 } 650 651 public String getType() { 652 return type; 653 } 654 655 } 656 657 public class TTLURL extends TTLObject { 658 private String uri; 659 660 protected TTLURL(int line, int col) { 661 this.line = line; 662 this.col = col; 663 } 664 665 public String getUri() { 666 return uri; 667 } 668 669 public void setUri(String uri) throws FHIRFormatError { 670 if (!uri.matches(IRI_URL)) 671 throw new FHIRFormatError("Illegal URI " + uri); 672 this.uri = uri; 673 } 674 675 @Override 676 public boolean hasValue(String value) { 677 return value.equals(this.uri); 678 } 679 } 680 681 public class TTLList extends TTLObject { 682 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 683 684 public TTLList(TTLObject obj) { 685 super(); 686 list.add(obj); 687 } 688 689 @Override 690 public boolean hasValue(String value) { 691 for (TTLObject obj : list) 692 if (obj.hasValue(value)) 693 return true; 694 return false; 695 } 696 697 public List<TTLObject> getList() { 698 return list; 699 } 700 701 } 702 703 public class TTLComplex extends TTLObject { 704 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 705 706 protected TTLComplex(int line, int col) { 707 this.line = line; 708 this.col = col; 709 } 710 711 public Map<String, TTLObject> getPredicates() { 712 return predicates; 713 } 714 715 @Override 716 public boolean hasValue(String value) { 717 return false; 718 } 719 720 public void addPredicate(String uri, TTLObject obj) { 721 if (!predicates.containsKey(uri)) 722 predicates.put(uri, obj); 723 else { 724 TTLObject eo = predicates.get(uri); 725 TTLList list = null; 726 if (eo instanceof TTLList) 727 list = (TTLList) eo; 728 else { 729 list = new TTLList(eo); 730 predicates.put(uri, list); 731 } 732 list.list.add(obj); 733 } 734 } 735 736 public void addPredicates(Map<String, TTLObject> values) { 737 for (String s : values.keySet()) { 738 addPredicate(s, values.get(s)); 739 } 740 } 741 } 742 743 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 744 745 private Object base; 746 747 public enum LexerTokenType { 748 TOKEN, // [, ], :, @ 749 WORD, // a word 750 URI, // a URI <> 751 LITERAL // "..." 752 } 753 754 public class Lexer { 755 756 private String source; 757 private LexerTokenType type; 758 private int cursor, line, col, startLine, startCol; 759 private String token; 760 761 public Lexer(String source) throws FHIRFormatError { 762 this.source = source; 763 cursor = 0; 764 line = 1; 765 col = 1; 766 readNext(false); 767 } 768 769 private void skipWhitespace() { 770 while (cursor < source.length()) { 771 char ch = source.charAt(cursor); 772 if (Character.isWhitespace(ch)) 773 grab(); 774 else if (ch == '#') { 775 ch = grab(); 776 while (cursor < source.length()) { 777 ch = grab(); 778 if (ch == '\r' || ch == '\n') { 779 break; 780 } 781 } 782 } else 783 break; 784 } 785 } 786 787 private char grab() { 788 char c = source.charAt(cursor); 789 if (c == '\n') { 790 line++; 791 col = 1; 792 } else 793 col++; 794 795 cursor++; 796 return c; 797 } 798 799 private void readNext(boolean postColon) throws FHIRFormatError { 800 token = null; 801 type = null; 802 skipWhitespace(); 803 if (cursor >= source.length()) 804 return; 805 startLine = line; 806 startCol = col; 807 char ch = grab(); 808 StringBuilder b = new StringBuilder(); 809 switch (ch) { 810 case '@': 811 case '.': 812 case ':': 813 case ';': 814 case '^': 815 case ',': 816 case ']': 817 case '[': 818 case '(': 819 case ')': 820 type = LexerTokenType.TOKEN; 821 b.append(ch); 822 token = b.toString(); 823 return; 824 case '<': 825 while (cursor < source.length()) { 826 ch = grab(); 827 if (ch == '>') 828 break; 829 b.append(ch); 830 } 831 type = LexerTokenType.URI; 832 token = unescape(b.toString(), true); 833 return; 834 case '"': 835 b.append(ch); 836 String end = "\""; 837 while (cursor < source.length()) { 838 ch = grab(); 839 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 840 cursor--; 841 break; 842 } 843 b.append(ch); 844 if (ch == '"') 845 if (b.toString().equals("\"\"\"")) 846 end = "\"\"\""; 847 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\" + end)) 848 break; 849 } 850 type = LexerTokenType.LITERAL; 851 token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false); 852 return; 853 case '\'': 854 b.append(ch); 855 end = "'"; 856 while (cursor < source.length()) { 857 ch = grab(); 858 if (b.equals("''") && ch != '\'') { 859 cursor--; 860 break; 861 } 862 b.append(ch); 863 if (b.toString().equals("'''")) 864 end = "'''"; 865 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 866 break; 867 } 868 type = LexerTokenType.LITERAL; 869 token = unescape(b.toString().substring(end.length(), b.length() - end.length()), false); 870 return; 871 default: 872 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') 873 || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 874 b.append(ch); 875 while (cursor < source.length()) { 876 ch = grab(); 877 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 878 // 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', 879 // '-', '+', '.', '\\', '#')) 880 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') 881 || ((ch == ':') && !postColon)) 882 break; 883 b.append(ch); 884 } 885 type = LexerTokenType.WORD; 886 token = b.toString(); 887 cursor--; 888 return; 889 } else 890 throw error("unexpected lexer char " + ch); 891 } 892 } 893 894 private String unescape(String s, boolean isUri) throws FHIRFormatError { 895 StringBuilder b = new StringBuilder(); 896 int i = 0; 897 while (i < s.length()) { 898 char ch = s.charAt(i); 899 if (ch == '\\' && i < s.length() - 1) { 900 i++; 901 switch (s.charAt(i)) { 902 case 't': 903 b.append('\t'); 904 break; 905 case 'r': 906 b.append('\r'); 907 break; 908 case 'n': 909 b.append('\n'); 910 break; 911 case 'f': 912 b.append('\f'); 913 break; 914 case '\'': 915 b.append('\''); 916 break; 917 case '\"': 918 b.append('\"'); 919 break; 920 case '\\': 921 b.append('\\'); 922 break; 923 case '/': 924 b.append('\\'); 925 break; 926 case 'U': 927 case 'u': 928 i++; 929 int l = 4; 930 int uc = Integer.parseInt(s.substring(i, i + l), 16); 931 if (uc < (isUri ? 33 : 32)) { 932 l = 8; 933 uc = Integer.parseInt(s.substring(i, i + 8), 16); 934 } 935 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 936 throw new FHIRFormatError("Illegal unicode character"); 937 b.append(Character.toString(uc)); 938 i = i + l; 939 break; 940 default: 941 throw new FHIRFormatError("Unknown character escape \\" + s.charAt(i)); 942 } 943 } else { 944 b.append(ch); 945 } 946 i++; 947 } 948 return b.toString(); 949 } 950 951 public boolean done() { 952 return type == null; 953 } 954 955 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 956 if (type != null && this.type != type) 957 throw error("Unexpected type. Found " + this.type.toString() + " looking for a " + type.toString()); 958 String res = token; 959 readNext(postColon); 960 return res; 961 } 962 963 public String peek() throws Exception { 964 return token; 965 } 966 967 public LexerTokenType peekType() { 968 return type; 969 } 970 971 public void token(String token) throws FHIRFormatError { 972 if (!token.equals(this.token)) 973 throw error("Unexpected word " + this.token + " looking for " + token); 974 next(LexerTokenType.TOKEN, token.equals(":")); 975 } 976 977 public void word(String word) throws Exception { 978 if (!word.equals(this.token)) 979 throw error("Unexpected word " + this.token + " looking for " + word); 980 next(LexerTokenType.WORD, false); 981 } 982 983 public String word() throws FHIRFormatError { 984 String t = token; 985 next(LexerTokenType.WORD, false); 986 return t; 987 } 988 989 public String uri() throws FHIRFormatError { 990 if (this.type != LexerTokenType.URI) 991 throw error("Unexpected type. Found " + this.type.toString() + " looking for a URI"); 992 String t = token; 993 next(LexerTokenType.URI, false); 994 return t; 995 } 996 997 public String literal() throws FHIRFormatError { 998 if (this.type != LexerTokenType.LITERAL) 999 throw error("Unexpected type. Found " + this.type.toString() + " looking for a Literal"); 1000 String t = token; 1001 next(LexerTokenType.LITERAL, false); 1002 return t; 1003 } 1004 1005 public boolean peek(LexerTokenType type, String token) { 1006 return this.type == type && this.token.equals(token); 1007 } 1008 1009 public FHIRFormatError error(String message) { 1010 return new FHIRFormatError("Syntax Error parsing Turtle on line " + Integer.toString(line) + " col " 1011 + Integer.toString(col) + ": " + message); 1012 } 1013 1014 } 1015 // 1016 // public void importTtl(Section sct, String ttl) throws Exception { 1017 // if (!Utilities.noString(ttl)) { 1018 // // System.out.println("import ttl: "+ttl); 1019 // Lexer lexer = new Lexer(ttl); 1020 // String subject = null; 1021 // String predicate = null; 1022 // while (!lexer.done()) { 1023 // if (subject == null) 1024 // subject = lexer.next(); 1025 // if (predicate == null) 1026 // predicate = lexer.next(); 1027 // if (lexer.peekType() == null) { 1028 // throw new Error("Unexpected end of input parsing turtle"); 1029 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1030 // sct.triple(subject, predicate, lexer.next()); 1031 // } else if (lexer.peek() == null) { 1032 // throw new Error("Unexected - turtle lexer found no token"); 1033 // } else if (lexer.peek().equals("[")) { 1034 // sct.triple(subject, predicate, importComplex(lexer)); 1035 // } else 1036 // throw new Exception("Not done yet"); 1037 // String n = lexer.next(); 1038 // if (Utilities.noString(n)) 1039 // break; 1040 // if (n.equals(".")) { 1041 // subject = null; 1042 // predicate = null; 1043 // } else if (n.equals(";")) { 1044 // predicate = null; 1045 // } else if (!n.equals(",")) 1046 // throw new Exception("Unexpected token "+n); 1047 // } 1048 // } 1049 // } 1050 1051 public void parse(String source) throws FHIRFormatError { 1052 prefixes.clear(); 1053 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1054 parse(new Lexer(source)); 1055 } 1056 1057 private void parse(Lexer lexer) throws FHIRFormatError { 1058 boolean doPrefixes = true; 1059 while (!lexer.done()) { 1060 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") 1061 || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1062 boolean sparqlStyle = false; 1063 boolean base = false; 1064 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1065 lexer.token("@"); 1066 String p = lexer.word(); 1067 if (p.equals("base")) 1068 base = true; 1069 else if (!p.equals("prefix")) 1070 throw new FHIRFormatError("Unexpected token " + p); 1071 } else { 1072 sparqlStyle = true; 1073 String p = lexer.word(); 1074 if (p.equals("BASE")) 1075 base = true; 1076 else if (!p.equals("PREFIX")) 1077 throw new FHIRFormatError("Unexpected token " + p); 1078 } 1079 String prefix = null; 1080 if (!base) { 1081 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1082 lexer.token(":"); 1083 } 1084 String url = lexer.next(LexerTokenType.URI, false); 1085 if (!sparqlStyle) 1086 lexer.token("."); 1087 if (!base) 1088 prefix(prefix, url); 1089 else if (this.base == null) 1090 this.base = url; 1091 else 1092 throw new FHIRFormatError("Duplicate @base"); 1093 } else if (lexer.peekType() == LexerTokenType.URI) { 1094 doPrefixes = false; 1095 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1096 uri.setUri(lexer.uri()); 1097 TTLComplex complex = parseComplex(lexer); 1098 objects.put(uri, complex); 1099 lexer.token("."); 1100 } else if (lexer.peekType() == LexerTokenType.WORD) { 1101 doPrefixes = false; 1102 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1103 String pfx = lexer.word(); 1104 if (!prefixes.containsKey(pfx)) 1105 throw new FHIRFormatError("Unknown prefix " + pfx); 1106 lexer.token(":"); 1107 uri.setUri(prefixes.get(pfx) + lexer.word()); 1108 TTLComplex complex = parseComplex(lexer); 1109 objects.put(uri, complex); 1110 lexer.token("."); 1111 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1112 doPrefixes = false; 1113 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1114 lexer.token(":"); 1115 if (!prefixes.containsKey(null)) 1116 throw new FHIRFormatError("Unknown prefix ''"); 1117 uri.setUri(prefixes.get(null) + lexer.word()); 1118 TTLComplex complex = parseComplex(lexer); 1119 objects.put(uri, complex); 1120 lexer.token("."); 1121 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1122 doPrefixes = false; 1123 lexer.token("["); 1124 TTLComplex bnode = parseComplex(lexer); 1125 lexer.token("]"); 1126 TTLComplex complex = null; 1127 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1128 complex = parseComplex(lexer); 1129 // at this point, we collapse bnode and complex, and give bnode a fictional 1130 // identity 1131 bnode.addPredicates(complex.predicates); 1132 } 1133 1134 objects.put(anonymousId(), bnode); 1135 lexer.token("."); 1136 } else 1137 throw lexer.error("Unknown token " + lexer.token); 1138 } 1139 } 1140 1141 private TTLURL anonymousId() throws FHIRFormatError { 1142 TTLURL url = new TTLURL(-1, -1); 1143 url.setUri("urn:uuid:" + UUID.randomUUID().toString().toLowerCase()); 1144 return url; 1145 } 1146 1147 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1148 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1149 1150 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1151 while (!done) { 1152 String uri = null; 1153 if (lexer.peekType() == LexerTokenType.URI) 1154 uri = lexer.uri(); 1155 else { 1156 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1157 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1158 lexer.token(":"); 1159 if (!prefixes.containsKey(t)) 1160 throw new FHIRFormatError("unknown prefix " + t); 1161 uri = prefixes.get(t) + lexer.word(); 1162 } else if (t.equals("a")) 1163 uri = prefixes.get("rdfs") + "type"; 1164 else 1165 throw lexer.error("unexpected token"); 1166 } 1167 1168 boolean inlist = false; 1169 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1170 inlist = true; 1171 lexer.token("("); 1172 } 1173 1174 boolean rpt = false; 1175 do { 1176 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1177 lexer.token("["); 1178 result.addPredicate(uri, parseComplex(lexer)); 1179 lexer.token("]"); 1180 } else if (lexer.peekType() == LexerTokenType.URI) { 1181 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1182 u.setUri(lexer.uri()); 1183 result.addPredicate(uri, u); 1184 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1185 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1186 u.value = lexer.literal(); 1187 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1188 lexer.token("^"); 1189 lexer.token("^"); 1190 if (lexer.peekType() == LexerTokenType.URI) { 1191 u.type = lexer.uri(); 1192 } else { 1193 String l = lexer.word(); 1194 lexer.token(":"); 1195 u.type = prefixes.get(l) + lexer.word(); 1196 } 1197 } 1198 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1199 // lang tag - skip it 1200 lexer.token("@"); 1201 String lang = lexer.word(); 1202 if (!lang.matches(LANG_REGEX)) { 1203 throw new FHIRFormatError("Invalid Language tag " + lang); 1204 } 1205 } 1206 result.addPredicate(uri, u); 1207 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1208 int sl = lexer.startLine; 1209 int sc = lexer.startCol; 1210 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1211 if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1212 TTLLiteral u = new TTLLiteral(sl, sc); 1213 u.value = pfx; 1214 result.addPredicate(uri, u); 1215 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1216 TTLLiteral u = new TTLLiteral(sl, sc); 1217 u.value = pfx; 1218 result.addPredicate(uri, u); 1219 } else { 1220 if (!prefixes.containsKey(pfx)) 1221 throw new FHIRFormatError("Unknown prefix " + (pfx == null ? "''" : pfx)); 1222 TTLURL u = new TTLURL(sl, sc); 1223 lexer.token(":"); 1224 u.setUri(prefixes.get(pfx) + lexer.word()); 1225 result.addPredicate(uri, u); 1226 } 1227 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1228 throw new FHIRFormatError("unexpected token " + lexer.token); 1229 } 1230 1231 if (inlist) 1232 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1233 else { 1234 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1235 if (rpt) 1236 lexer.readNext(false); 1237 } 1238 } while (rpt); 1239 if (inlist) 1240 lexer.token(")"); 1241 1242 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1243 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1244 lexer.token(";"); 1245 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1246 } else { 1247 done = true; 1248 } 1249 } 1250 return result; 1251 } 1252 1253 public Map<TTLURL, TTLComplex> getObjects() { 1254 return objects; 1255 } 1256 1257 public TTLComplex getObject(String url) { 1258 for (TTLURL t : objects.keySet()) { 1259 if (t.getUri().equals(url)) 1260 return objects.get(t); 1261 } 1262 return null; 1263 } 1264 1265 // public void parseFragment(Lexer lexer) throws Exception { 1266 // lexer.next(); // read [ 1267 // Complex obj = new Complex(); 1268 // while (!lexer.peek().equals("]")) { 1269 // String predicate = lexer.next(); 1270 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == 1271 // LexerTokenType.LITERAL) { 1272 // obj.predicate(predicate, lexer.next()); 1273 // } else if (lexer.peek().equals("[")) { 1274 // obj.predicate(predicate, importComplex(lexer)); 1275 // } else 1276 // throw new Exception("Not done yet"); 1277 // if (lexer.peek().equals(";")) 1278 // lexer.next(); 1279 // } 1280 // lexer.next(); // read ] 1281 // //return obj; 1282 // } 1283 // 1284 // public void importTtl(Section sct, String ttl) throws Exception { 1285 // if (!Utilities.noString(ttl)) { 1286 // // System.out.println("import ttl: "+ttl); 1287 // Lexer lexer = new Lexer(ttl); 1288 // String subject = null; 1289 // String predicate = null; 1290 // while (!lexer.done()) { 1291 // if (subject == null) 1292 // subject = lexer.next(); 1293 // if (predicate == null) 1294 // predicate = lexer.next(); 1295 // if (lexer.peekType() == null) { 1296 // throw new Error("Unexpected end of input parsing turtle"); 1297 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1298 // sct.triple(subject, predicate, lexer.next()); 1299 // } else if (lexer.peek() == null) { 1300 // throw new Error("Unexected - turtle lexer found no token"); 1301 // } else if (lexer.peek().equals("[")) { 1302 // sct.triple(subject, predicate, importComplex(lexer)); 1303 // } else 1304 // throw new Exception("Not done yet"); 1305 // String n = lexer.next(); 1306 // if (Utilities.noString(n)) 1307 // break; 1308 // if (n.equals(".")) { 1309 // subject = null; 1310 // predicate = null; 1311 // } else if (n.equals(";")) { 1312 // predicate = null; 1313 // } else if (!n.equals(",")) 1314 // throw new Exception("Unexpected token "+n); 1315 // } 1316 // } 1317 // } 1318 1319 // private Complex importComplex(Lexer lexer) throws Exception { 1320 // lexer.next(); // read [ 1321 // Complex obj = new Complex(); 1322 // while (!lexer.peek().equals("]")) { 1323 // String predicate = lexer.next(); 1324 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == 1325 // LexerTokenType.LITERAL) { 1326 // obj.predicate(predicate, lexer.next()); 1327 // } else if (lexer.peek().equals("[")) { 1328 // obj.predicate(predicate, importComplex(lexer)); 1329 // } else 1330 // throw new Exception("Not done yet"); 1331 // if (lexer.peek().equals(";")) 1332 // lexer.next(); 1333 // } 1334 // lexer.next(); // read ] 1335 // return obj; 1336 // } 1337 1338}