001package org.hl7.fhir.r4.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032public class TurtleLexer { 033 034 public enum TurtleTokenType { 035 NULL, TOKEN, SPECIAL, LITERAL 036 } 037 038 private String source; 039 private int cursor; 040 private String token; 041 private TurtleTokenType type; 042 043 public TurtleLexer(String source) throws Exception { 044 this.source = source; 045 cursor = 0; 046 readNext(); 047 } 048 049 private void readNext() throws Exception { 050 if (cursor >= source.length()) { 051 token = null; 052 type = TurtleTokenType.NULL; 053 } else if (source.charAt(cursor) == '"') 054 readLiteral(); 055 else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']') 056 readDelimiter(); 057 else if (source.charAt(cursor) == '(') 058 throw new Exception("not supported yet"); 059 else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',') 060 readDelimiter(); 061 else if (Character.isLetter(source.charAt(cursor))) 062 readToken(); 063 064 } 065 066 private void readLiteral() { 067 StringBuilder b = new StringBuilder(); 068 cursor++; // skip " 069 while (cursor < source.length() && source.charAt(cursor) != '"') { 070 if (source.charAt(cursor) == '\\') { 071 b.append(source.charAt(cursor)); 072 cursor++; 073 } 074 b.append(source.charAt(cursor)); 075 cursor++; 076 } 077 token = "\"" + b.toString() + "\""; 078 type = TurtleTokenType.LITERAL; 079 cursor++; // skip " 080 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 081 cursor++; 082 } 083 084 private void readDelimiter() { 085 StringBuilder b = new StringBuilder(); 086 b.append(source.charAt(cursor)); 087 cursor++; 088 token = b.toString(); 089 type = TurtleTokenType.SPECIAL; 090 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 091 cursor++; 092 } 093 094 private void readToken() { 095 StringBuilder b = new StringBuilder(); 096 while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) { 097 if (source.charAt(cursor) == '\\') { 098 b.append(source.charAt(cursor)); 099 cursor++; 100 } 101 b.append(source.charAt(cursor)); 102 cursor++; 103 } 104 token = b.toString(); 105 type = TurtleTokenType.TOKEN; 106 if (token.endsWith(".")) { 107 cursor--; 108 token = token.substring(0, token.length() - 1); 109 } 110 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 111 cursor++; 112 } 113 114 private boolean isValidTokenChar(char c) { 115 return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.'; 116 } 117 118 public boolean done() { 119 return type == TurtleTokenType.NULL; 120 } 121 122 public String next() throws Exception { 123 String res = token; 124 readNext(); 125 return res; 126 } 127 128 public String peek() throws Exception { 129 return token; 130 } 131 132 public TurtleTokenType peekType() { 133 return type; 134 } 135 136}