001package org.hl7.fhir.r5.formats; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034public class TurtleLexer { 035 036 public enum TurtleTokenType { 037 NULL, 038 TOKEN, SPECIAL, LITERAL 039 } 040 041 private String source; 042 private int cursor; 043 private String token; 044 private TurtleTokenType type; 045 046 public TurtleLexer(String source) throws Exception { 047 this.source = source; 048 cursor = 0; 049 readNext(); 050 } 051 052 private void readNext() throws Exception { 053 if (cursor >= source.length()) { 054 token = null; 055 type = TurtleTokenType.NULL; 056 } else if (source.charAt(cursor) == '"') 057 readLiteral(); 058 else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']') 059 readDelimiter(); 060 else if (source.charAt(cursor) == '(') 061 throw new Exception("not supported yet"); 062 else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',') 063 readDelimiter(); 064 else if (Character.isLetter(source.charAt(cursor))) 065 readToken(); 066 067 } 068 069 private void readLiteral() { 070 StringBuilder b = new StringBuilder(); 071 cursor++; // skip " 072 while (cursor < source.length() && source.charAt(cursor) != '"') { 073 if (source.charAt(cursor) == '\\') { 074 b.append(source.charAt(cursor)); 075 cursor++; 076 } 077 b.append(source.charAt(cursor)); 078 cursor++; 079 } 080 token = "\""+b.toString()+"\""; 081 type = TurtleTokenType.LITERAL; 082 cursor++; // skip " 083 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 084 cursor++; 085 } 086 087 private void readDelimiter() { 088 StringBuilder b = new StringBuilder(); 089 b.append(source.charAt(cursor)); 090 cursor++; 091 token = b.toString(); 092 type = TurtleTokenType.SPECIAL; 093 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 094 cursor++; 095 } 096 097 private void readToken() { 098 StringBuilder b = new StringBuilder(); 099 while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) { 100 if (source.charAt(cursor) == '\\') { 101 b.append(source.charAt(cursor)); 102 cursor++; 103 } 104 b.append(source.charAt(cursor)); 105 cursor++; 106 } 107 token = b.toString(); 108 type = TurtleTokenType.TOKEN; 109 if (token.endsWith(".")) { 110 cursor--; 111 token = token.substring(0, token.length()-1); 112 } 113 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 114 cursor++; 115 } 116 117 private boolean isValidTokenChar(char c) { 118 return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.'; 119 } 120 121 public boolean done() { 122 return type == TurtleTokenType.NULL; 123 } 124 125 public String next() throws Exception { 126 String res = token; 127 readNext(); 128 return res; 129 } 130 131 public String peek() throws Exception { 132 return token; 133 } 134 135 public TurtleTokenType peekType() { 136 return type; 137 } 138 139 140}