001package org.hl7.fhir.r5.formats;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034public class TurtleLexer {
035
036  public enum TurtleTokenType {
037    NULL, 
038    TOKEN, SPECIAL, LITERAL
039  }
040
041  private String source;
042  private int cursor; 
043  private String token;
044  private TurtleTokenType type;
045  
046  public TurtleLexer(String source) throws Exception {
047    this.source = source;
048    cursor = 0;
049    readNext();
050  }
051
052  private void readNext() throws Exception {    
053    if (cursor >= source.length()) {
054      token = null;
055      type = TurtleTokenType.NULL;
056    } else if (source.charAt(cursor) == '"')
057      readLiteral();
058    else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']')
059      readDelimiter();
060    else if (source.charAt(cursor) == '(')
061      throw new Exception("not supported yet");
062    else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',')
063      readDelimiter();
064    else if (Character.isLetter(source.charAt(cursor)))
065      readToken();
066    
067  }
068
069  private void readLiteral() {
070    StringBuilder b = new StringBuilder();
071    cursor++; // skip "        
072    while (cursor < source.length() && source.charAt(cursor) != '"') {
073      if (source.charAt(cursor) == '\\') {
074        b.append(source.charAt(cursor));
075        cursor++;        
076      } 
077      b.append(source.charAt(cursor));
078      cursor++;
079    }
080    token = "\""+b.toString()+"\"";
081    type = TurtleTokenType.LITERAL;
082    cursor++; // skip "
083    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
084      cursor++;    
085  }
086
087  private void readDelimiter() {
088    StringBuilder b = new StringBuilder();
089    b.append(source.charAt(cursor));
090    cursor++;
091    token = b.toString();
092    type = TurtleTokenType.SPECIAL;
093    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
094      cursor++;
095  }
096
097  private void readToken() {
098    StringBuilder b = new StringBuilder();
099    while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) {
100      if (source.charAt(cursor) == '\\') {
101        b.append(source.charAt(cursor));
102        cursor++;        
103      } 
104      b.append(source.charAt(cursor));
105      cursor++;
106    }
107    token = b.toString();
108    type = TurtleTokenType.TOKEN;
109    if (token.endsWith(".")) {
110      cursor--;
111      token = token.substring(0, token.length()-1);
112    }
113    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
114      cursor++;
115  }
116
117  private boolean isValidTokenChar(char c) {
118    return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.';
119  }
120
121  public boolean done() {
122    return type == TurtleTokenType.NULL;
123  }
124
125  public String next() throws Exception {
126    String res = token;
127    readNext();
128    return res;
129  }
130
131  public String peek() throws Exception {
132    return token;
133  }
134
135  public TurtleTokenType peekType() {
136    return type;
137  }
138  
139  
140}