001package org.hl7.fhir.dstu2.utils; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032import org.hl7.fhir.dstu2.model.ExpressionNode; 033import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation; 034import org.hl7.fhir.exceptions.FHIRException; 035import org.hl7.fhir.utilities.Utilities; 036 037// shared lexer for concrete syntaxes 038// - FluentPath 039// - Mapping language 040 041public class FHIRLexer { 042 public class FHIRLexerException extends FHIRException { 043 044 public FHIRLexerException() { 045 super(); 046 } 047 048 public FHIRLexerException(String message, Throwable cause) { 049 super(message, cause); 050 } 051 052 public FHIRLexerException(String message) { 053 super(message); 054 } 055 056 public FHIRLexerException(Throwable cause) { 057 super(cause); 058 } 059 060 } 061 062 private String path; 063 private int cursor; 064 private int currentStart; 065 private String current; 066 private SourceLocation currentLocation; 067 private SourceLocation currentStartLocation; 068 private int id; 069 070 public FHIRLexer(String source) throws FHIRLexerException { 071 this.path = source; 072 currentLocation = new SourceLocation(1, 1); 073 next(); 074 } 075 076 public String getCurrent() { 077 return current; 078 } 079 080 public SourceLocation getCurrentLocation() { 081 return currentLocation; 082 } 083 084 public boolean isConstant(boolean incDoubleQuotes) { 085 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' 086 || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+' 087 || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false") 088 || current.equals("{}"); 089 } 090 091 public boolean isStringConstant() { 092 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 093 } 094 095 public String take() throws FHIRLexerException { 096 String s = current; 097 next(); 098 return s; 099 } 100 101 public boolean isToken() { 102 if (Utilities.noString(current)) 103 return false; 104 105 if (current.startsWith("$")) 106 return true; 107 108 if (current.equals("*") || current.equals("**")) 109 return true; 110 111 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') 112 || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 113 for (int i = 1; i < current.length(); i++) 114 if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') 115 || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') 116 || (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 117 return false; 118 return true; 119 } 120 return false; 121 } 122 123 public FHIRLexerException error(String msg) { 124 return error(msg, currentLocation.toString()); 125 } 126 127 public FHIRLexerException error(String msg, String location) { 128 return new FHIRLexerException("Error in " + path + " at " + location + ": " + msg); 129 } 130 131 public void next() throws FHIRLexerException { 132 current = null; 133 boolean last13 = false; 134 while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) { 135 if (path.charAt(cursor) == '\r') { 136 currentLocation.setLine(currentLocation.getLine() + 1); 137 currentLocation.setColumn(1); 138 last13 = true; 139 } else if (!last13 && (path.charAt(cursor) == '\n')) { 140 currentLocation.setLine(currentLocation.getLine() + 1); 141 currentLocation.setColumn(1); 142 last13 = false; 143 } else { 144 last13 = false; 145 currentLocation.setColumn(currentLocation.getColumn() + 1); 146 } 147 cursor++; 148 } 149 currentStart = cursor; 150 currentStartLocation = currentLocation; 151 if (cursor < path.length()) { 152 char ch = path.charAt(cursor); 153 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 154 cursor++; 155 if (cursor < path.length() 156 && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 157 cursor++; 158 current = path.substring(currentStart, cursor); 159 } else if (ch == '.') { 160 cursor++; 161 if (cursor < path.length() && (path.charAt(cursor) == '.')) 162 cursor++; 163 current = path.substring(currentStart, cursor); 164 } else if (ch >= '0' && ch <= '9') { 165 cursor++; 166 boolean dotted = false; 167 while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') 168 || (path.charAt(cursor) == '.') && !dotted)) { 169 if (path.charAt(cursor) == '.') 170 dotted = true; 171 cursor++; 172 } 173 if (path.charAt(cursor - 1) == '.') 174 cursor--; 175 current = path.substring(currentStart, cursor); 176 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 177 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') 178 || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') 179 || (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 180 cursor++; 181 current = path.substring(currentStart, cursor); 182 } else if (ch == '%') { 183 cursor++; 184 if (cursor < path.length() && (path.charAt(cursor) == '"')) { 185 cursor++; 186 while (cursor < path.length() && (path.charAt(cursor) != '"')) 187 cursor++; 188 cursor++; 189 } else 190 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') 191 || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') 192 || (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' 193 || path.charAt(cursor) == '-')) 194 cursor++; 195 current = path.substring(currentStart, cursor); 196 } else if (ch == '/') { 197 cursor++; 198 if (cursor < path.length() && (path.charAt(cursor) == '/')) { 199 cursor++; 200 while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 201 cursor++; 202 } 203 current = path.substring(currentStart, cursor); 204 } else if (ch == '$') { 205 cursor++; 206 while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')) 207 cursor++; 208 current = path.substring(currentStart, cursor); 209 } else if (ch == '{') { 210 cursor++; 211 ch = path.charAt(cursor); 212 if (ch == '}') 213 cursor++; 214 current = path.substring(currentStart, cursor); 215 } else if (ch == '"') { 216 cursor++; 217 boolean escape = false; 218 while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) { 219 if (escape) 220 escape = false; 221 else 222 escape = (path.charAt(cursor) == '\\'); 223 cursor++; 224 } 225 if (cursor == path.length()) 226 throw error("Unterminated string"); 227 cursor++; 228 current = "\"" + path.substring(currentStart + 1, cursor - 1) + "\""; 229 } else if (ch == '\'') { 230 cursor++; 231 char ech = ch; 232 boolean escape = false; 233 while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) { 234 if (escape) 235 escape = false; 236 else 237 escape = (path.charAt(cursor) == '\\'); 238 cursor++; 239 } 240 if (cursor == path.length()) 241 throw error("Unterminated string"); 242 cursor++; 243 current = path.substring(currentStart, cursor); 244 if (ech == '\'') 245 current = "\'" + current.substring(1, current.length() - 1) + "\'"; 246 } else if (ch == '@') { 247 cursor++; 248 while (cursor < path.length() && isDateChar(path.charAt(cursor))) 249 cursor++; 250 current = path.substring(currentStart, cursor); 251 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 252 cursor++; 253 current = path.substring(currentStart, cursor); 254 } 255 } 256 } 257 258 private boolean isDateChar(char ch) { 259 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 260 } 261 262 public boolean isOp() { 263 return ExpressionNode.Operation.fromCode(current) != null; 264 } 265 266 public boolean done() { 267 return currentStart >= path.length(); 268 } 269 270 public int nextId() { 271 id++; 272 return id; 273 } 274 275 public SourceLocation getCurrentStartLocation() { 276 return currentStartLocation; 277 } 278 279 // special case use 280 public void setCurrent(String current) { 281 this.current = current; 282 } 283 284 public boolean hasComment() { 285 return !done() && current.startsWith("//"); 286 } 287 288 public boolean hasToken(String kw) { 289 return !done() && kw.equals(current); 290 } 291 292 public void token(String kw) throws FHIRLexerException { 293 if (!kw.equals(current)) 294 throw error("Found \"" + current + "\" expecting \"" + kw + "\""); 295 next(); 296 } 297 298 public String readConstant(String desc) throws FHIRLexerException { 299 if (!isStringConstant()) 300 throw error("Found " + current + " expecting \"[" + desc + "]\""); 301 302 return processConstant(take()); 303 } 304 305 public String processConstant(String s) throws FHIRLexerException { 306 StringBuilder b = new StringBuilder(); 307 int i = 1; 308 while (i < s.length() - 1) { 309 char ch = s.charAt(i); 310 if (ch == '\\') { 311 i++; 312 switch (s.charAt(i)) { 313 case 't': 314 b.append('\t'); 315 break; 316 case 'r': 317 b.append('\r'); 318 break; 319 case 'n': 320 b.append('\n'); 321 break; 322 case 'f': 323 b.append('\f'); 324 break; 325 case '\'': 326 b.append('\''); 327 break; 328 case '\\': 329 b.append('\\'); 330 break; 331 case '/': 332 b.append('\\'); 333 break; 334 case 'u': 335 i++; 336 int uc = Integer.parseInt(s.substring(i, i + 4), 16); 337 b.append((char) uc); 338 i = i + 4; 339 break; 340 default: 341 throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i)); 342 } 343 } else { 344 b.append(ch); 345 i++; 346 } 347 } 348 return b.toString(); 349 350 } 351 352 public void skipToken(String token) throws FHIRLexerException { 353 if (getCurrent().equals(token)) 354 next(); 355 356 } 357 358 public String takeDottedToken() throws FHIRLexerException { 359 StringBuilder b = new StringBuilder(); 360 b.append(take()); 361 while (!done() && getCurrent().equals(".")) { 362 b.append(take()); 363 b.append(take()); 364 } 365 return b.toString(); 366 } 367 368 void skipComments() throws FHIRLexerException { 369 while (!done() && hasComment()) 370 next(); 371 } 372 373}