001package org.hl7.fhir.r4.fhirpath; 002 003import org.hl7.fhir.exceptions.FHIRException; 004import org.hl7.fhir.utilities.SourceLocation; 005import org.hl7.fhir.utilities.Utilities; 006 007// shared lexer for concrete syntaxes 008// - FluentPath 009// - Mapping language 010 011public class FHIRLexer { 012 public class FHIRLexerException extends FHIRException { 013 014 public FHIRLexerException() { 015 super(); 016 } 017 018 public FHIRLexerException(String message, Throwable cause) { 019 super(message, cause); 020 } 021 022 public FHIRLexerException(String message) { 023 super(message); 024 } 025 026 public FHIRLexerException(Throwable cause) { 027 super(cause); 028 } 029 030 } 031 032 private String source; 033 private int cursor; 034 private int currentStart; 035 private String current; 036 private SourceLocation currentLocation; 037 private SourceLocation currentStartLocation; 038 private int id; 039 private String name; 040 041 public FHIRLexer(String source, String name) throws FHIRLexerException { 042 this.source = source; 043 this.name = name == null ? "??" : name; 044 currentLocation = new SourceLocation(1, 1); 045 next(); 046 } 047 048 public FHIRLexer(String source, int i) throws FHIRLexerException { 049 this.source = source; 050 this.cursor = i; 051 currentLocation = new SourceLocation(1, 1); 052 next(); 053 } 054 055 public String getCurrent() { 056 return current; 057 } 058 059 public SourceLocation getCurrentLocation() { 060 return currentLocation; 061 } 062 063 public boolean isConstant() { 064 return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' 065 || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+' 066 || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false") 067 || current.equals("{}"); 068 } 069 070 public boolean isFixedName() { 071 return current != null && (current.charAt(0) == '`'); 072 } 073 074 public boolean isStringConstant() { 075 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 076 } 077 078 public String take() throws FHIRLexerException { 079 String s = current; 080 next(); 081 return s; 082 } 083 084 public int takeInt() throws FHIRLexerException { 085 String s = current; 086 if (!Utilities.isInteger(s)) 087 throw error("Found " + current + " expecting an integer"); 088 next(); 089 return Integer.parseInt(s); 090 } 091 092 public boolean isToken() { 093 if (Utilities.noString(current)) 094 return false; 095 096 if (current.startsWith("$")) 097 return true; 098 099 if (current.equals("*") || current.equals("**")) 100 return true; 101 102 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') 103 || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 104 for (int i = 1; i < current.length(); i++) 105 if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') 106 || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') 107 || (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 108 return false; 109 return true; 110 } 111 return false; 112 } 113 114 public FHIRLexerException error(String msg) { 115 return error(msg, currentLocation.toString()); 116 } 117 118 public FHIRLexerException error(String msg, String location) { 119 return new FHIRLexerException("Error in " + name + " at " + location + ": " + msg); 120 } 121 122 public void next() throws FHIRLexerException { 123 skipWhitespaceAndComments(); 124 current = null; 125 currentStart = cursor; 126 currentStartLocation = currentLocation; 127 if (cursor < source.length()) { 128 char ch = source.charAt(cursor); 129 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 130 cursor++; 131 if (cursor < source.length() 132 && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') 133 || (ch == '-' && source.charAt(cursor) == '>')) 134 cursor++; 135 current = source.substring(currentStart, cursor); 136 } else if (ch == '.') { 137 cursor++; 138 if (cursor < source.length() && (source.charAt(cursor) == '.')) 139 cursor++; 140 current = source.substring(currentStart, cursor); 141 } else if (ch >= '0' && ch <= '9') { 142 cursor++; 143 boolean dotted = false; 144 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') 145 || (source.charAt(cursor) == '.') && !dotted)) { 146 if (source.charAt(cursor) == '.') 147 dotted = true; 148 cursor++; 149 } 150 if (source.charAt(cursor - 1) == '.') 151 cursor--; 152 current = source.substring(currentStart, cursor); 153 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 154 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') 155 || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') 156 || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 157 cursor++; 158 current = source.substring(currentStart, cursor); 159 } else if (ch == '%') { 160 cursor++; 161 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 162 cursor++; 163 while (cursor < source.length() && (source.charAt(cursor) != '`')) 164 cursor++; 165 cursor++; 166 } else 167 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') 168 || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') 169 || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' 170 || source.charAt(cursor) == '-')) 171 cursor++; 172 current = source.substring(currentStart, cursor); 173 } else if (ch == '/') { 174 cursor++; 175 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 176 // this is en error - should already have been skipped 177 error("This shoudn't happen?"); 178 } 179 current = source.substring(currentStart, cursor); 180 } else if (ch == '$') { 181 cursor++; 182 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 183 cursor++; 184 current = source.substring(currentStart, cursor); 185 } else if (ch == '{') { 186 cursor++; 187 ch = source.charAt(cursor); 188 if (ch == '}') 189 cursor++; 190 current = source.substring(currentStart, cursor); 191 } else if (ch == '"') { 192 cursor++; 193 boolean escape = false; 194 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 195 if (escape) 196 escape = false; 197 else 198 escape = (source.charAt(cursor) == '\\'); 199 cursor++; 200 } 201 if (cursor == source.length()) 202 throw error("Unterminated string"); 203 cursor++; 204 current = "\"" + source.substring(currentStart + 1, cursor - 1) + "\""; 205 } else if (ch == '`') { 206 cursor++; 207 boolean escape = false; 208 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 209 if (escape) 210 escape = false; 211 else 212 escape = (source.charAt(cursor) == '\\'); 213 cursor++; 214 } 215 if (cursor == source.length()) 216 throw error("Unterminated string"); 217 cursor++; 218 current = "`" + source.substring(currentStart + 1, cursor - 1) + "`"; 219 } else if (ch == '\'') { 220 cursor++; 221 char ech = ch; 222 boolean escape = false; 223 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 224 if (escape) 225 escape = false; 226 else 227 escape = (source.charAt(cursor) == '\\'); 228 cursor++; 229 } 230 if (cursor == source.length()) 231 throw error("Unterminated string"); 232 cursor++; 233 current = source.substring(currentStart, cursor); 234 if (ech == '\'') 235 current = "\'" + current.substring(1, current.length() - 1) + "\'"; 236 } else if (ch == '`') { 237 cursor++; 238 boolean escape = false; 239 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 240 if (escape) 241 escape = false; 242 else 243 escape = (source.charAt(cursor) == '\\'); 244 cursor++; 245 } 246 if (cursor == source.length()) 247 throw error("Unterminated string"); 248 cursor++; 249 current = "`" + source.substring(currentStart + 1, cursor - 1) + "`"; 250 } else if (ch == '@') { 251 int start = cursor; 252 cursor++; 253 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 254 cursor++; 255 current = source.substring(currentStart, cursor); 256 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 257 cursor++; 258 current = source.substring(currentStart, cursor); 259 } 260 } 261 } 262 263 private void skipWhitespaceAndComments() { 264 boolean last13 = false; 265 boolean done = false; 266 while (cursor < source.length() && !done) { 267 if (cursor < source.length() - 1 && "//".equals(source.substring(cursor, cursor + 2))) { 268 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 269 cursor++; 270 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor + 2))) { 271 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor + 2))) { 272 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 273 cursor++; 274 } 275 if (cursor >= source.length() - 1) { 276 error("Unfinished comment"); 277 } else { 278 cursor = cursor + 2; 279 } 280 } else if (Character.isWhitespace(source.charAt(cursor))) { 281 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 282 cursor++; 283 } else { 284 done = true; 285 } 286 } 287 } 288 289 private boolean isDateChar(char ch, int start) { 290 int eot = source.charAt(start + 1) == 'T' ? 10 : 20; 291 292 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) 293 || (cursor - start == eot && ch == '.' && cursor < source.length() - 1 294 && Character.isDigit(source.charAt(cursor + 1))); 295 } 296 297 public boolean isOp() { 298 return ExpressionNode.Operation.fromCode(current) != null; 299 } 300 301 public boolean done() { 302 return currentStart >= source.length(); 303 } 304 305 public int nextId() { 306 id++; 307 return id; 308 } 309 310 public SourceLocation getCurrentStartLocation() { 311 return currentStartLocation; 312 } 313 314 // special case use 315 public void setCurrent(String current) { 316 this.current = current; 317 } 318 319 public boolean hasComment() { 320 return !done() && current.startsWith("//"); 321 } 322 323 public boolean hasToken(String kw) { 324 return !done() && kw.equals(current); 325 } 326 327 public boolean hasToken(String... names) { 328 if (done()) 329 return false; 330 for (String s : names) 331 if (s.equals(current)) 332 return true; 333 return false; 334 } 335 336 public void token(String kw) throws FHIRLexerException { 337 if (!kw.equals(current)) 338 throw error("Found \"" + current + "\" expecting \"" + kw + "\""); 339 next(); 340 } 341 342 public String readConstant(String desc) throws FHIRLexerException { 343 if (!isStringConstant()) 344 throw error("Found " + current + " expecting \"[" + desc + "]\""); 345 346 return processConstant(take()); 347 } 348 349 public String readFixedName(String desc) throws FHIRLexerException { 350 if (!isFixedName()) 351 throw error("Found " + current + " expecting \"[" + desc + "]\""); 352 353 return processFixedName(take()); 354 } 355 356 public String processConstant(String s) throws FHIRLexerException { 357 StringBuilder b = new StringBuilder(); 358 int i = 1; 359 while (i < s.length() - 1) { 360 char ch = s.charAt(i); 361 if (ch == '\\') { 362 i++; 363 switch (s.charAt(i)) { 364 case 't': 365 b.append('\t'); 366 break; 367 case 'r': 368 b.append('\r'); 369 break; 370 case 'n': 371 b.append('\n'); 372 break; 373 case 'f': 374 b.append('\f'); 375 break; 376 case '\'': 377 b.append('\''); 378 break; 379 case '"': 380 b.append('"'); 381 break; 382 case '`': 383 b.append('`'); 384 break; 385 case '\\': 386 b.append('\\'); 387 break; 388 case '/': 389 b.append('/'); 390 break; 391 case 'u': 392 i++; 393 int uc = Integer.parseInt(s.substring(i, i + 4), 16); 394 b.append(Character.toString(uc)); 395 i = i + 4; 396 break; 397 default: 398 throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i)); 399 } 400 } else { 401 b.append(ch); 402 i++; 403 } 404 } 405 return b.toString(); 406 } 407 408 public String processFixedName(String s) throws FHIRLexerException { 409 StringBuilder b = new StringBuilder(); 410 int i = 1; 411 while (i < s.length() - 1) { 412 char ch = s.charAt(i); 413 if (ch == '\\') { 414 i++; 415 switch (s.charAt(i)) { 416 case 't': 417 b.append('\t'); 418 break; 419 case 'r': 420 b.append('\r'); 421 break; 422 case 'n': 423 b.append('\n'); 424 break; 425 case 'f': 426 b.append('\f'); 427 break; 428 case '\'': 429 b.append('\''); 430 break; 431 case '"': 432 b.append('"'); 433 break; 434 case '\\': 435 b.append('\\'); 436 break; 437 case '/': 438 b.append('/'); 439 break; 440 case 'u': 441 i++; 442 int uc = Integer.parseInt(s.substring(i, i + 4), 16); 443 b.append(Character.toString(uc)); 444 i = i + 4; 445 break; 446 default: 447 throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i)); 448 } 449 } else { 450 b.append(ch); 451 i++; 452 } 453 } 454 return b.toString(); 455 } 456 457 public void skipToken(String token) throws FHIRLexerException { 458 if (getCurrent().equals(token)) 459 next(); 460 461 } 462 463 public String takeDottedToken() throws FHIRLexerException { 464 StringBuilder b = new StringBuilder(); 465 b.append(take()); 466 while (!done() && getCurrent().equals(".")) { 467 b.append(take()); 468 b.append(take()); 469 } 470 return b.toString(); 471 } 472 473 public void skipComments() throws FHIRLexerException { 474 while (!done() && hasComment()) 475 next(); 476 } 477 478 public int getCurrentStart() { 479 return currentStart; 480 } 481 482}