
001package org.hl7.fhir.dstu3.utils.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.OutputStream; 036import java.io.OutputStreamWriter; 037import java.io.UnsupportedEncodingException; 038import java.util.ArrayList; 039import java.util.Collections; 040import java.util.HashMap; 041import java.util.HashSet; 042import java.util.List; 043import java.util.Map; 044import java.util.Set; 045import java.util.UUID; 046 047import org.hl7.fhir.exceptions.FHIRFormatError; 048import org.hl7.fhir.utilities.Utilities; 049 050@Deprecated 051public class Turtle { 052 053 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 054 055 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 056 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 057 058 // Object model 059 public abstract class Triple { 060 private String uri; 061 } 062 063 public class StringType extends Triple { 064 private String value; 065 066 public StringType(String value) { 067 super(); 068 this.value = value; 069 } 070 } 071 072 public class Complex extends Triple { 073 protected List<Predicate> predicates = new ArrayList<Predicate>(); 074 075 public Complex predicate(String predicate, String object) { 076 predicateSet.add(predicate); 077 objectSet.add(object); 078 return predicate(predicate, new StringType(object)); 079 } 080 081 public Complex linkedPredicate(String predicate, String object, String link) { 082 predicateSet.add(predicate); 083 objectSet.add(object); 084 return linkedPredicate(predicate, new StringType(object), link); 085 } 086 087 public Complex predicate(String predicate, Triple object) { 088 Predicate p = getPredicate(predicate); 089 if (p == null) { 090 p = new Predicate(); 091 p.predicate = predicate; 092 predicateSet.add(predicate); 093 predicates.add(p); 094 } 095 if (object instanceof StringType) 096 objectSet.add(((StringType) object).value); 097 p.objects.add(object); 098 return this; 099 } 100 101 protected Predicate getPredicate(String predicate) { 102 for (Predicate p : predicates) 103 if (p.predicate.equals(predicate)) 104 return p; 105 return null; 106 } 107 108 public Complex linkedPredicate(String predicate, Triple object, String link) { 109 Predicate p = getPredicate(predicate); 110 if (p == null) { 111 p = new Predicate(); 112 p.predicate = predicate; 113 p.link = link; 114 predicateSet.add(predicate); 115 predicates.add(p); 116 } 117 if (object instanceof StringType) 118 objectSet.add(((StringType) object).value); 119 p.objects.add(object); 120 return this; 121 } 122 123 public Complex predicate(String predicate) { 124 predicateSet.add(predicate); 125 Complex c = complex(); 126 predicate(predicate, c); 127 return c; 128 } 129 130 public Complex linkedPredicate(String predicate, String link) { 131 predicateSet.add(predicate); 132 Complex c = complex(); 133 linkedPredicate(predicate, c, link); 134 return c; 135 } 136 137 public void prefix(String code, String url) { 138 Turtle.this.prefix(code, url); 139 } 140 } 141 142 private class Predicate { 143 protected String predicate; 144 protected String link; 145 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 146 protected String comment; 147 148 public String getPredicate() { 149 return predicate; 150 } 151 public String makelink() { 152 if (link == null) 153 return predicate; 154 else 155 return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>"; 156 } 157 158 public List<Triple> getObjects() { 159 return objects; 160 } 161 public String getComment() { 162 return comment; 163 } 164 } 165 166 public class Subject extends Complex { 167 private String id; 168 169 public Predicate predicate(String predicate, Triple object, String comment) { 170 Predicate p = getPredicate(predicate); 171 if (p == null) { 172 p = new Predicate(); 173 p.predicate = predicate; 174 predicateSet.add(predicate); 175 predicates.add(p); 176 p.comment = comment; 177 } 178 if (object instanceof StringType) 179 objectSet.add(((StringType) object).value); 180 p.objects.add(object); 181 return p; 182 } 183 184 public void comment(String comment) { 185 if (!Utilities.noString(comment)) { 186 predicate("rdfs:comment", literal(comment)); 187 predicate("dcterms:description", literal(comment)); 188 } 189 } 190 191 public void label(String label) { 192 if (!Utilities.noString(label)) { 193 predicate("rdfs:label", literal(label)); 194 predicate("dc:title", literal(label)); 195 } 196 } 197 198 } 199 200 public class Section { 201 private String name; 202 private List<Subject> subjects = new ArrayList<Subject>(); 203 204 public Subject triple(String subject, String predicate, String object, String comment) { 205 return triple(subject, predicate, new StringType(object), comment); 206 } 207 208 public Subject triple(String subject, String predicate, String object) { 209 return triple(subject, predicate, new StringType(object)); 210 } 211 212 public Subject triple(String subject, String predicate, Triple object) { 213 return triple(subject, predicate, object, null); 214 } 215 216 public Subject triple(String subject, String predicate, Triple object, String comment) { 217 Subject s = subject(subject); 218 s.predicate(predicate, object, comment); 219 return s; 220 } 221 222 public void comment(String subject, String comment) { 223 triple(subject, "rdfs:comment", literal(comment)); 224 triple(subject, "dcterms:description", literal(comment)); 225 } 226 227 public void label(String subject, String comment) { 228 triple(subject, "rdfs:label", literal(comment)); 229 triple(subject, "dc:title", literal(comment)); 230 } 231 232 public Subject subject(String subject) { 233 for (Subject ss : subjects) 234 if (ss.id.equals(subject)) 235 return ss; 236 Subject s = new Subject(); 237 s.id = subject; 238 subjects.add(s); 239 return s; 240 } 241 242 public boolean hasSubject(String subject) { 243 for (Subject ss : subjects) 244 if (ss.id.equals(subject)) 245 return true; 246 return false; 247 } 248 } 249 250 private List<Section> sections = new ArrayList<Section>(); 251 protected Set<String> subjectSet = new HashSet<String>(); 252 protected Set<String> predicateSet = new HashSet<String>(); 253 protected Set<String> objectSet = new HashSet<String>(); 254 protected Map<String, String> prefixes = new HashMap<String, String>(); 255 256 public void prefix(String code, String url) { 257 prefixes.put(code, url); 258 } 259 260 protected boolean hasSection(String sn) { 261 for (Section s : sections) 262 if (s.name.equals(sn)) 263 return true; 264 return false; 265 266 } 267 268 public Section section(String sn) { 269 if (hasSection(sn)) 270 throw new Error("Duplicate section name "+sn); 271 Section s = new Section(); 272 s.name = sn; 273 sections.add(s); 274 return s; 275 } 276 277 protected String matches(String url, String prefixUri, String prefix) { 278 if (url.startsWith(prefixUri)) { 279 prefixes.put(prefix, prefixUri); 280 return prefix+":"+escape(url.substring(prefixUri.length()), false); 281 } 282 return null; 283 } 284 285 protected Complex complex() { 286 return new Complex(); 287 } 288 289 private void checkPrefix(Triple object) { 290 if (object instanceof StringType) 291 checkPrefix(((StringType) object).value); 292 else { 293 Complex obj = (Complex) object; 294 for (Predicate po : obj.predicates) { 295 checkPrefix(po.getPredicate()); 296 for (Triple o : po.getObjects()) 297 checkPrefix(o); 298 } 299 } 300 } 301 302 protected void checkPrefix(String pname) { 303 if (pname.startsWith("(")) 304 return; 305 if (pname.startsWith("\"")) 306 return; 307 if (pname.startsWith("<")) 308 return; 309 310 if (pname.contains(":")) { 311 String prefix = pname.substring(0, pname.indexOf(":")); 312 if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn")) 313 throw new Error("undefined prefix "+prefix); 314 } 315 } 316 317 protected StringType literal(String s) { 318 return new StringType("\""+escape(s, true)+"\""); 319 } 320 321 protected StringType literalTyped(String s, String t) { 322 return new StringType("\""+escape(s, true)+"\"^^xs:"+t); 323 } 324 325 public static String escape(String s, boolean string) { 326 if (s == null) 327 return ""; 328 329 StringBuilder b = new StringBuilder(); 330 for (char c : s.toCharArray()) { 331 if (c == '\r') 332 b.append("\\r"); 333 else if (c == '\n') 334 b.append("\\n"); 335 else if (c == '"') 336 b.append("\\\""); 337 else if (c == '\\') 338 b.append("\\\\"); 339 else if (c == '/' && !string) 340 b.append("\\/"); 341 else 342 b.append(c); 343 } 344 return b.toString(); 345 } 346 347 protected String pctEncode(String s) { 348 if (s == null) 349 return ""; 350 351 StringBuilder b = new StringBuilder(); 352 for (char c : s.toCharArray()) { 353 if (c >= 'A' && c <= 'Z') 354 b.append(c); 355 else if (c >= 'a' && c <= 'z') 356 b.append(c); 357 else if (c >= '0' && c <= '9') 358 b.append(c); 359 else if (c == '.') 360 b.append(c); 361 else 362 b.append("%"+Integer.toHexString(c)); 363 } 364 return b.toString(); 365 } 366 367 protected List<String> sorted(Set<String> keys) { 368 List<String> names = new ArrayList<String>(); 369 names.addAll(keys); 370 Collections.sort(names); 371 return names; 372 } 373 374 public void commit(OutputStream destination, boolean header) throws IOException { 375 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 376 commitPrefixes(writer, header); 377 for (Section s : sections) { 378 commitSection(writer, s); 379 } 380 writer.ln("# -------------------------------------------------------------------------------------"); 381 writer.ln(); 382 writer.flush(); 383 writer.close(); 384 } 385 386 public String asHtml() throws Exception { 387 StringBuilder b = new StringBuilder(); 388 b.append("<pre class=\"rdf\">\r\n"); 389 commitPrefixes(b); 390 for (Section s : sections) { 391 commitSection(b, s); 392 } 393 b.append("</pre>\r\n"); 394 b.append("\r\n"); 395 return b.toString(); 396 } 397 398 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 399 if (header) { 400 writer.ln("# FHIR Sub-definitions"); 401 writer.write("# This is work in progress, and may change rapidly \r\n"); 402 writer.ln(); 403 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 404 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 405 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 406 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 407 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 408 writer.ln(); 409 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 410 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 411 writer.write("# predicates \r\n"); 412 writer.ln(); 413 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 414 writer.ln(); 415 writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 416 writer.ln(); 417 } 418 for (String p : sorted(prefixes.keySet())) 419 writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> ."); 420 writer.ln(); 421 if (header) { 422 writer.ln("# Predicates used in this file:"); 423 for (String s : sorted(predicateSet)) 424 writer.ln(" # "+s); 425 writer.ln(); 426 } 427 } 428 429 private void commitPrefixes(StringBuilder b) throws Exception { 430 for (String p : sorted(prefixes.keySet())) 431 b.append("@prefix "+p+": <"+prefixes.get(p)+"> .\r\n"); 432 b.append("\r\n"); 433 } 434 435 // private String lastSubject = null; 436 // private String lastComment = ""; 437 438 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 439 writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())); 440 writer.ln(); 441 for (Subject sbj : section.subjects) { 442 if (Utilities.noString(sbj.id)) { 443 writer.write("["); 444 } else { 445 writer.write(sbj.id); 446 writer.write(" "); 447 } 448 int i = 0; 449 450 for (Predicate p : sbj.predicates) { 451 writer.write(p.getPredicate()); 452 writer.write(" "); 453 boolean first = true; 454 for (Triple o : p.getObjects()) { 455 if (first) 456 first = false; 457 else 458 writer.write(", "); 459 if (o instanceof StringType) 460 writer.write(((StringType) o).value); 461 else { 462 writer.write("["); 463 if (write((Complex) o, writer, 4)) 464 writer.write("\r\n ]"); 465 else 466 writer.write("]"); 467 } 468 } 469 String comment = p.comment == null? "" : " # "+p.comment; 470 i++; 471 if (i < sbj.predicates.size()) 472 writer.write(";"+comment+"\r\n "); 473 else { 474 if (Utilities.noString(sbj.id)) 475 writer.write("]"); 476 writer.write(" ."+comment+"\r\n\r\n"); 477 } 478 } 479 } 480 } 481 482 private void commitSection(StringBuilder b, Section section) throws Exception { 483 b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n"); 484 b.append("\r\n"); 485 for (Subject sbj : section.subjects) { 486 b.append(Utilities.escapeXml(sbj.id)); 487 b.append(" "); 488 int i = 0; 489 490 for (Predicate p : sbj.predicates) { 491 b.append(p.makelink()); 492 b.append(" "); 493 boolean first = true; 494 for (Triple o : p.getObjects()) { 495 if (first) 496 first = false; 497 else 498 b.append(", "); 499 if (o instanceof StringType) 500 b.append(Utilities.escapeXml(((StringType) o).value)); 501 else { 502 b.append("["); 503 if (write((Complex) o, b, 4)) 504 b.append("\r\n ]"); 505 else 506 b.append("]"); 507 } 508 } 509 String comment = p.comment == null? "" : " # "+p.comment; 510 i++; 511 if (i < sbj.predicates.size()) 512 b.append(";"+Utilities.escapeXml(comment)+"\r\n "); 513 else 514 b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n"); 515 } 516 } 517 } 518 519 protected class LineOutputStreamWriter extends OutputStreamWriter { 520 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 521 super(out, "UTF-8"); 522 } 523 524 private void ln() throws IOException { 525 write("\r\n"); 526 } 527 528 private void ln(String s) throws IOException { 529 write(s); 530 write("\r\n"); 531 } 532 } 533 534 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 535 if (complex.predicates.isEmpty()) 536 return false; 537 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 538 writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value); 539 return false; 540 } 541 String left = Utilities.padLeft("", ' ', indent); 542 int i = 0; 543 for (Predicate po : complex.predicates) { 544 writer.write("\r\n"); 545 boolean first = true; 546 for (Triple o : po.getObjects()) { 547 if (first) { 548 first = false; 549 writer.write(left+" "+po.getPredicate()+" "); 550 } else 551 writer.write(", "); 552 if (o instanceof StringType) 553 writer.write(((StringType) o).value); 554 else { 555 writer.write("["); 556 if (write((Complex) o, writer, indent+2)) 557 writer.write("\r\n"+left+" ]"); 558 else 559 writer.write(" ]"); 560 } 561 } 562 i++; 563 if (i < complex.predicates.size()) 564 writer.write(";"); 565 if (!Utilities.noString(po.comment)) 566 writer.write(" # "+escape(po.comment, false)); 567 } 568 return true; 569 } 570 571 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 572 if (complex.predicates.isEmpty()) 573 return false; 574 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 575 b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 576 return false; 577 } 578 String left = Utilities.padLeft("", ' ', indent); 579 int i = 0; 580 for (Predicate po : complex.predicates) { 581 b.append("\r\n"); 582 boolean first = true; 583 for (Triple o : po.getObjects()) { 584 if (first) { 585 first = false; 586 b.append(left+" "+po.makelink()+" "); 587 } else 588 b.append(", "); 589 if (o instanceof StringType) 590 b.append(Utilities.escapeXml(((StringType) o).value)); 591 else { 592 b.append("["); 593 if (write((Complex) o, b, indent+2)) 594 b.append(left+" ]"); 595 else 596 b.append(" ]"); 597 } 598 } 599 i++; 600 if (i < complex.predicates.size()) 601 b.append(";"); 602 if (!Utilities.noString(po.comment)) 603 b.append(" # "+Utilities.escapeXml(escape(po.comment, false))); 604 } 605 return true; 606 } 607 608 609 public abstract class TTLObject { 610 protected int line; 611 protected int col; 612 613 abstract public boolean hasValue(String value); 614 615 public int getLine() { 616 return line; 617 } 618 619 public int getCol() { 620 return col; 621 } 622 623 624 } 625 626 627 public class TTLLiteral extends TTLObject { 628 629 private String value; 630 private String type; 631 protected TTLLiteral(int line, int col) { 632 this.line = line; 633 this.col = col; 634 } 635 @Override 636 public boolean hasValue(String value) { 637 return value.equals(this.value); 638 } 639 public String getValue() { 640 return value; 641 } 642 public String getType() { 643 return type; 644 } 645 646 } 647 648 public class TTLURL extends TTLObject { 649 private String uri; 650 651 protected TTLURL(int line, int col) { 652 this.line = line; 653 this.col = col; 654 } 655 656 public String getUri() { 657 return uri; 658 } 659 660 public void setUri(String uri) throws FHIRFormatError { 661 if (!uri.matches(IRI_URL)) 662 throw new FHIRFormatError("Illegal URI "+uri); 663 this.uri = uri; 664 } 665 666 @Override 667 public boolean hasValue(String value) { 668 return value.equals(this.uri); 669 } 670 } 671 672 public class TTLList extends TTLObject { 673 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 674 675 public TTLList(TTLObject obj) { 676 super(); 677 list.add(obj); 678 } 679 680 @Override 681 public boolean hasValue(String value) { 682 for (TTLObject obj : list) 683 if (obj.hasValue(value)) 684 return true; 685 return false; 686 } 687 688 public List<TTLObject> getList() { 689 return list; 690 } 691 692 } 693 public class TTLComplex extends TTLObject { 694 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 695 protected TTLComplex(int line, int col) { 696 this.line = line; 697 this.col = col; 698 } 699 public Map<String, TTLObject> getPredicates() { 700 return predicates; 701 } 702 @Override 703 public boolean hasValue(String value) { 704 return false; 705 } 706 public void addPredicate(String uri, TTLObject obj) { 707 if (!predicates.containsKey(uri)) 708 predicates.put(uri, obj); 709 else { 710 TTLObject eo = predicates.get(uri); 711 TTLList list = null; 712 if (eo instanceof TTLList) 713 list = (TTLList) eo; 714 else { 715 list = new TTLList(eo); 716 predicates.put(uri, list); 717 } 718 list.list.add(obj); 719 } 720 } 721 public void addPredicates(Map<String, TTLObject> values) { 722 for (String s : values.keySet()) { 723 addPredicate(s, values.get(s)); 724 } 725 } 726 } 727 728 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 729 730 private Object base; 731 732 public enum LexerTokenType { 733 TOKEN, // [, ], :, @ 734 WORD, // a word 735 URI, // a URI <> 736 LITERAL // "..." 737 } 738 739 public class Lexer { 740 741 742 private String source; 743 private LexerTokenType type; 744 private int cursor, line, col, startLine, startCol; 745 private String token; 746 747 public Lexer(String source) throws FHIRFormatError { 748 this.source = source; 749 cursor = 0; 750 line = 1; 751 col = 1; 752 readNext(false); 753 } 754 755 private void skipWhitespace() { 756 while (cursor < source.length()) { 757 char ch = source.charAt(cursor); 758 if (Character.isWhitespace(ch)) 759 grab(); 760 else if (ch == '#') { 761 ch = grab(); 762 while (cursor < source.length()) { 763 ch = grab(); 764 if (ch == '\r' || ch == '\n') { 765 break; 766 } 767 } 768 } else 769 break; 770 } 771 } 772 773 private char grab() { 774 char c = source.charAt(cursor); 775 if (c == '\n') { 776 line++; 777 col = 1; 778 } else 779 col++; 780 781 cursor++; 782 return c; 783 } 784 785 private void readNext(boolean postColon) throws FHIRFormatError { 786 token = null; 787 type = null; 788 skipWhitespace(); 789 if (cursor >= source.length()) 790 return; 791 startLine = line; 792 startCol = col; 793 char ch = grab(); 794 StringBuilder b = new StringBuilder(); 795 switch (ch) { 796 case '@': 797 case '.': 798 case ':': 799 case ';': 800 case '^': 801 case ',': 802 case ']': 803 case '[': 804 case '(': 805 case ')': 806 type = LexerTokenType.TOKEN; 807 b.append(ch); 808 token = b.toString(); 809 return; 810 case '<': 811 while (cursor < source.length()) { 812 ch = grab(); 813 if (ch == '>') 814 break; 815 b.append(ch); 816 } 817 type = LexerTokenType.URI; 818 token = unescape(b.toString(), true); 819 return; 820 case '"': 821 b.append(ch); 822 String end = "\""; 823 while (cursor < source.length()) { 824 ch = grab(); 825 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 826 cursor--; 827 break; 828 } 829 b.append(ch); 830 if (ch == '"') 831 if (b.toString().equals("\"\"\"")) 832 end = "\"\"\""; 833 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end)) 834 break; 835 } 836 type = LexerTokenType.LITERAL; 837 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 838 return; 839 case '\'': 840 b.append(ch); 841 end = "'"; 842 while (cursor < source.length()) { 843 ch = grab(); 844 if (b.equals("''") && ch != '\'') { 845 cursor--; 846 break; 847 } 848 b.append(ch); 849 if (b.toString().equals("'''")) 850 end = "'''"; 851 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 852 break; 853 } 854 type = LexerTokenType.LITERAL; 855 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 856 return; 857 default: 858 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 859 b.append(ch); 860 while (cursor < source.length()) { 861 ch = grab(); 862 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#')) 863 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon)) 864 break; 865 b.append(ch); 866 } 867 type = LexerTokenType.WORD; 868 token = b.toString(); 869 cursor--; 870 return; 871 } else 872 throw error("unexpected lexer char "+ch); 873 } 874 } 875 876 private String unescape(String s, boolean isUri) throws FHIRFormatError { 877 StringBuilder b = new StringBuilder(); 878 int i = 0; 879 while (i < s.length()) { 880 char ch = s.charAt(i); 881 if (ch == '\\' && i < s.length()-1) { 882 i++; 883 switch (s.charAt(i)) { 884 case 't': 885 b.append('\t'); 886 break; 887 case 'r': 888 b.append('\r'); 889 break; 890 case 'n': 891 b.append('\n'); 892 break; 893 case 'f': 894 b.append('\f'); 895 break; 896 case '\'': 897 b.append('\''); 898 break; 899 case '\"': 900 b.append('\"'); 901 break; 902 case '\\': 903 b.append('\\'); 904 break; 905 case '/': 906 b.append('\\'); 907 break; 908 case 'U': 909 case 'u': 910 i++; 911 int l = 4; 912 int uc = Integer.parseInt(s.substring(i, i+l), 16); 913 if (uc < (isUri ? 33 : 32)) { 914 l = 8; 915 uc = Integer.parseInt(s.substring(i, i+8), 16); 916 } 917 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 918 throw new FHIRFormatError("Illegal unicode character"); 919 b.append((char) uc); 920 i = i + l; 921 break; 922 default: 923 throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); 924 } 925 } else { 926 b.append(ch); 927 } 928 i++; 929 } 930 return b.toString(); 931 } 932 933 public boolean done() { 934 return type == null; 935 } 936 937 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 938 if (type != null && this.type != type) 939 throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString()); 940 String res = token; 941 readNext(postColon); 942 return res; 943 } 944 945 public String peek() throws Exception { 946 return token; 947 } 948 949 public LexerTokenType peekType() { 950 return type; 951 } 952 953 public void token(String token) throws FHIRFormatError { 954 if (!token.equals(this.token)) 955 throw error("Unexpected word "+this.token+" looking for "+token); 956 next(LexerTokenType.TOKEN, token.equals(":")); 957 } 958 959 public void word(String word) throws Exception { 960 if (!word.equals(this.token)) 961 throw error("Unexpected word "+this.token+" looking for "+word); 962 next(LexerTokenType.WORD, false); 963 } 964 965 public String word() throws FHIRFormatError { 966 String t = token; 967 next(LexerTokenType.WORD, false); 968 return t; 969 } 970 971 public String uri() throws FHIRFormatError { 972 if (this.type != LexerTokenType.URI) 973 throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI"); 974 String t = token; 975 next(LexerTokenType.URI, false); 976 return t; 977 } 978 979 public String literal() throws FHIRFormatError { 980 if (this.type != LexerTokenType.LITERAL) 981 throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal"); 982 String t = token; 983 next(LexerTokenType.LITERAL, false); 984 return t; 985 } 986 987 public boolean peek(LexerTokenType type, String token) { 988 return this.type == type && this.token.equals(token); 989 } 990 991 public FHIRFormatError error(String message) { 992 return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message); 993 } 994 995 } 996 // 997 // public void importTtl(Section sct, String ttl) throws Exception { 998 // if (!Utilities.noString(ttl)) { 999 // // System.out.println("import ttl: "+ttl); 1000 // Lexer lexer = new Lexer(ttl); 1001 // String subject = null; 1002 // String predicate = null; 1003 // while (!lexer.done()) { 1004 // if (subject == null) 1005 // subject = lexer.next(); 1006 // if (predicate == null) 1007 // predicate = lexer.next(); 1008 // if (lexer.peekType() == null) { 1009 // throw new Error("Unexpected end of input parsing turtle"); 1010 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1011 // sct.triple(subject, predicate, lexer.next()); 1012 // } else if (lexer.peek() == null) { 1013 // throw new Error("Unexected - turtle lexer found no token"); 1014 // } else if (lexer.peek().equals("[")) { 1015 // sct.triple(subject, predicate, importComplex(lexer)); 1016 // } else 1017 // throw new Exception("Not done yet"); 1018 // String n = lexer.next(); 1019 // if (Utilities.noString(n)) 1020 // break; 1021 // if (n.equals(".")) { 1022 // subject = null; 1023 // predicate = null; 1024 // } else if (n.equals(";")) { 1025 // predicate = null; 1026 // } else if (!n.equals(",")) 1027 // throw new Exception("Unexpected token "+n); 1028 // } 1029 // } 1030 // } 1031 1032 public void parse(String source) throws FHIRFormatError { 1033 prefixes.clear(); 1034 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1035 parse(new Lexer(source)); 1036 } 1037 1038 private void parse(Lexer lexer) throws FHIRFormatError { 1039 boolean doPrefixes = true; 1040 while (!lexer.done()) { 1041 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1042 boolean sparqlStyle = false; 1043 boolean base = false; 1044 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1045 lexer.token("@"); 1046 String p = lexer.word(); 1047 if (p.equals("base")) 1048 base = true; 1049 else if (!p.equals("prefix")) 1050 throw new FHIRFormatError("Unexpected token "+p); 1051 } else { 1052 sparqlStyle = true; 1053 String p = lexer.word(); 1054 if (p.equals("BASE")) 1055 base = true; 1056 else if (!p.equals("PREFIX")) 1057 throw new FHIRFormatError("Unexpected token "+p); 1058 } 1059 String prefix = null; 1060 if (!base) { 1061 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1062 lexer.token(":"); 1063 } 1064 String url = lexer.next(LexerTokenType.URI, false); 1065 if (!sparqlStyle) 1066 lexer.token("."); 1067 if (!base) 1068 prefix(prefix, url); 1069 else if (this.base == null) 1070 this.base = url; 1071 else 1072 throw new FHIRFormatError("Duplicate @base"); 1073 } else if (lexer.peekType() == LexerTokenType.URI) { 1074 doPrefixes = false; 1075 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1076 uri.setUri(lexer.uri()); 1077 TTLComplex complex = parseComplex(lexer); 1078 objects.put(uri, complex); 1079 lexer.token("."); 1080 } else if (lexer.peekType() == LexerTokenType.WORD) { 1081 doPrefixes = false; 1082 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1083 String pfx = lexer.word(); 1084 if (!prefixes.containsKey(pfx)) 1085 throw new FHIRFormatError("Unknown prefix "+pfx); 1086 lexer.token(":"); 1087 uri.setUri(prefixes.get(pfx)+lexer.word()); 1088 TTLComplex complex = parseComplex(lexer); 1089 objects.put(uri, complex); 1090 lexer.token("."); 1091 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1092 doPrefixes = false; 1093 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1094 lexer.token(":"); 1095 if (!prefixes.containsKey(null)) 1096 throw new FHIRFormatError("Unknown prefix ''"); 1097 uri.setUri(prefixes.get(null)+lexer.word()); 1098 TTLComplex complex = parseComplex(lexer); 1099 objects.put(uri, complex); 1100 lexer.token("."); 1101 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1102 doPrefixes = false; 1103 lexer.token("["); 1104 TTLComplex bnode = parseComplex(lexer); 1105 lexer.token("]"); 1106 TTLComplex complex = null; 1107 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1108 complex = parseComplex(lexer); 1109 // at this point, we collapse bnode and complex, and give bnode a fictional identity 1110 bnode.addPredicates(complex.predicates); 1111 } 1112 1113 objects.put(anonymousId(), bnode); 1114 lexer.token("."); 1115 } else 1116 throw lexer.error("Unknown token "+lexer.token); 1117 } 1118 } 1119 1120 private TTLURL anonymousId() throws FHIRFormatError { 1121 TTLURL url = new TTLURL(-1, -1); 1122 url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase()); 1123 return url; 1124 } 1125 1126 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1127 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1128 1129 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1130 while (!done) { 1131 String uri = null; 1132 if (lexer.peekType() == LexerTokenType.URI) 1133 uri = lexer.uri(); 1134 else { 1135 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1136 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1137 lexer.token(":"); 1138 if (!prefixes.containsKey(t)) 1139 throw new FHIRFormatError("unknown prefix "+t); 1140 uri = prefixes.get(t)+lexer.word(); 1141 } else if (t.equals("a")) 1142 uri = prefixes.get("rdfs")+"type"; 1143 else 1144 throw lexer.error("unexpected token"); 1145 } 1146 1147 boolean inlist = false; 1148 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1149 inlist = true; 1150 lexer.token("("); 1151 } 1152 1153 boolean rpt = false; 1154 do { 1155 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1156 lexer.token("["); 1157 result.addPredicate(uri, parseComplex(lexer)); 1158 lexer.token("]"); 1159 } else if (lexer.peekType() == LexerTokenType.URI) { 1160 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1161 u.setUri(lexer.uri()); 1162 result.addPredicate(uri, u); 1163 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1164 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1165 u.value = lexer.literal(); 1166 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1167 lexer.token("^"); 1168 lexer.token("^"); 1169 if (lexer.peekType() == LexerTokenType.URI) { 1170 u.type = lexer.uri(); 1171 } else { 1172 String l = lexer.word(); 1173 lexer.token(":"); 1174 u.type = prefixes.get(l)+ lexer.word(); 1175 } 1176 } 1177 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1178 //lang tag - skip it 1179 lexer.token("@"); 1180 String lang = lexer.word(); 1181 if (!lang.matches(LANG_REGEX)) { 1182 throw new FHIRFormatError("Invalid Language tag "+lang); 1183 } 1184 } 1185 result.addPredicate(uri, u); 1186 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1187 int sl = lexer.startLine; 1188 int sc = lexer.startCol; 1189 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1190 if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1191 TTLLiteral u = new TTLLiteral(sl, sc); 1192 u.value = pfx; 1193 result.addPredicate(uri, u); 1194 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1195 TTLLiteral u = new TTLLiteral(sl, sc); 1196 u.value = pfx; 1197 result.addPredicate(uri, u); 1198 } else { 1199 if (!prefixes.containsKey(pfx)) 1200 throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx)); 1201 TTLURL u = new TTLURL(sl, sc); 1202 lexer.token(":"); 1203 u.setUri(prefixes.get(pfx)+lexer.word()); 1204 result.addPredicate(uri, u); 1205 } 1206 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1207 throw new FHIRFormatError("unexpected token "+lexer.token); 1208 } 1209 1210 if (inlist) 1211 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1212 else { 1213 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1214 if (rpt) 1215 lexer.readNext(false); 1216 } 1217 } while (rpt); 1218 if (inlist) 1219 lexer.token(")"); 1220 1221 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1222 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1223 lexer.token(";"); 1224 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1225 } else { 1226 done = true; 1227 } 1228 } 1229 return result; 1230 } 1231 1232 public Map<TTLURL, TTLComplex> getObjects() { 1233 return objects; 1234 } 1235 1236 public TTLComplex getObject(String url) { 1237 for (TTLURL t : objects.keySet()) { 1238 if (t.getUri().equals(url)) 1239 return objects.get(t); 1240 } 1241 return null; 1242 } 1243 1244 // public void parseFragment(Lexer lexer) throws Exception { 1245 // lexer.next(); // read [ 1246 // Complex obj = new Complex(); 1247 // while (!lexer.peek().equals("]")) { 1248 // String predicate = lexer.next(); 1249 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1250 // obj.predicate(predicate, lexer.next()); 1251 // } else if (lexer.peek().equals("[")) { 1252 // obj.predicate(predicate, importComplex(lexer)); 1253 // } else 1254 // throw new Exception("Not done yet"); 1255 // if (lexer.peek().equals(";")) 1256 // lexer.next(); 1257 // } 1258 // lexer.next(); // read ] 1259 // //return obj; 1260 // } 1261 // 1262 // public void importTtl(Section sct, String ttl) throws Exception { 1263 // if (!Utilities.noString(ttl)) { 1264 // // System.out.println("import ttl: "+ttl); 1265 // Lexer lexer = new Lexer(ttl); 1266 // String subject = null; 1267 // String predicate = null; 1268 // while (!lexer.done()) { 1269 // if (subject == null) 1270 // subject = lexer.next(); 1271 // if (predicate == null) 1272 // predicate = lexer.next(); 1273 // if (lexer.peekType() == null) { 1274 // throw new Error("Unexpected end of input parsing turtle"); 1275 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1276 // sct.triple(subject, predicate, lexer.next()); 1277 // } else if (lexer.peek() == null) { 1278 // throw new Error("Unexected - turtle lexer found no token"); 1279 // } else if (lexer.peek().equals("[")) { 1280 // sct.triple(subject, predicate, importComplex(lexer)); 1281 // } else 1282 // throw new Exception("Not done yet"); 1283 // String n = lexer.next(); 1284 // if (Utilities.noString(n)) 1285 // break; 1286 // if (n.equals(".")) { 1287 // subject = null; 1288 // predicate = null; 1289 // } else if (n.equals(";")) { 1290 // predicate = null; 1291 // } else if (!n.equals(",")) 1292 // throw new Exception("Unexpected token "+n); 1293 // } 1294 // } 1295 //} 1296 1297 // private Complex importComplex(Lexer lexer) throws Exception { 1298 // lexer.next(); // read [ 1299 // Complex obj = new Complex(); 1300 // while (!lexer.peek().equals("]")) { 1301 // String predicate = lexer.next(); 1302 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1303 // obj.predicate(predicate, lexer.next()); 1304 // } else if (lexer.peek().equals("[")) { 1305 // obj.predicate(predicate, importComplex(lexer)); 1306 // } else 1307 // throw new Exception("Not done yet"); 1308 // if (lexer.peek().equals(";")) 1309 // lexer.next(); 1310 // } 1311 // lexer.next(); // read ] 1312 // return obj; 1313 // } 1314 1315}