001package org.hl7.fhir.r4.formats;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032public class TurtleLexer {
033
034  public enum TurtleTokenType {
035    NULL, TOKEN, SPECIAL, LITERAL
036  }
037
038  private String source;
039  private int cursor;
040  private String token;
041  private TurtleTokenType type;
042
043  public TurtleLexer(String source) throws Exception {
044    this.source = source;
045    cursor = 0;
046    readNext();
047  }
048
049  private void readNext() throws Exception {
050    if (cursor >= source.length()) {
051      token = null;
052      type = TurtleTokenType.NULL;
053    } else if (source.charAt(cursor) == '"')
054      readLiteral();
055    else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']')
056      readDelimiter();
057    else if (source.charAt(cursor) == '(')
058      throw new Exception("not supported yet");
059    else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',')
060      readDelimiter();
061    else if (Character.isLetter(source.charAt(cursor)))
062      readToken();
063
064  }
065
066  private void readLiteral() {
067    StringBuilder b = new StringBuilder();
068    cursor++; // skip "
069    while (cursor < source.length() && source.charAt(cursor) != '"') {
070      if (source.charAt(cursor) == '\\') {
071        b.append(source.charAt(cursor));
072        cursor++;
073      }
074      b.append(source.charAt(cursor));
075      cursor++;
076    }
077    token = "\"" + b.toString() + "\"";
078    type = TurtleTokenType.LITERAL;
079    cursor++; // skip "
080    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor)))
081      cursor++;
082  }
083
084  private void readDelimiter() {
085    StringBuilder b = new StringBuilder();
086    b.append(source.charAt(cursor));
087    cursor++;
088    token = b.toString();
089    type = TurtleTokenType.SPECIAL;
090    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor)))
091      cursor++;
092  }
093
094  private void readToken() {
095    StringBuilder b = new StringBuilder();
096    while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) {
097      if (source.charAt(cursor) == '\\') {
098        b.append(source.charAt(cursor));
099        cursor++;
100      }
101      b.append(source.charAt(cursor));
102      cursor++;
103    }
104    token = b.toString();
105    type = TurtleTokenType.TOKEN;
106    if (token.endsWith(".")) {
107      cursor--;
108      token = token.substring(0, token.length() - 1);
109    }
110    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor)))
111      cursor++;
112  }
113
114  private boolean isValidTokenChar(char c) {
115    return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.';
116  }
117
118  public boolean done() {
119    return type == TurtleTokenType.NULL;
120  }
121
122  public String next() throws Exception {
123    String res = token;
124    readNext();
125    return res;
126  }
127
128  public String peek() throws Exception {
129    return token;
130  }
131
132  public TurtleTokenType peekType() {
133    return type;
134  }
135
136}