
001package org.hl7.fhir.r5.fhirpath; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.hl7.fhir.exceptions.FHIRException; 007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 008import org.hl7.fhir.utilities.SourceLocation; 009import org.hl7.fhir.utilities.Utilities; 010 011// shared lexer for concrete syntaxes 012// - FluentPath 013// - Mapping language 014 015public class FHIRLexer { 016 public class FHIRLexerException extends FHIRException { 017 018 private SourceLocation location; 019 020 public FHIRLexerException(String message) { 021 super(message); 022 } 023 024 public FHIRLexerException(String message, Throwable cause) { 025 super(message, cause); 026 } 027 028 public FHIRLexerException(String message, SourceLocation location) { 029 super(message); 030 this.location = location; 031 } 032 033 public SourceLocation getLocation() { 034 return location; 035 } 036 037 } 038 039 private String source; 040 private int cursor; 041 private int currentStart; 042 private String current; 043 private List<String> comments = new ArrayList<>(); 044 private SourceLocation currentLocation; 045 private SourceLocation currentStartLocation; 046 private int id; 047 private String name; 048 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 049 private SourceLocation commentLocation; 050 private boolean metadataFormat; 051 private boolean allowDoubleQuotes; 052 053 public FHIRLexer(String source, String name) throws FHIRLexerException { 054 this.source = source == null ? "" : Utilities.stripBOM(source); 055 this.name = name == null ? "??" : name; 056 currentLocation = new SourceLocation(1, 1); 057 next(); 058 } 059 060 public FHIRLexer(String source, int i) throws FHIRLexerException { 061 this.source = Utilities.stripBOM(source); 062 this.cursor = i; 063 currentLocation = new SourceLocation(1, 1); 064 next(); 065 } 066 public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException { 067 this.source = Utilities.stripBOM(source); 068 this.cursor = i; 069 this.allowDoubleQuotes = allowDoubleQuotes; 070 currentLocation = new SourceLocation(1, 1); 071 next(); 072 } 073 public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException { 074 this.source = source == null ? "" : Utilities.stripBOM(source); 075 this.name = name == null ? "??" : name; 076 this.metadataFormat = metadataFormat; 077 this.allowDoubleQuotes = allowDoubleQuotes; 078 currentLocation = new SourceLocation(1, 1); 079 next(); 080 } 081 public String getCurrent() { 082 return current; 083 } 084 085 public SourceLocation getCurrentLocation() { 086 return currentLocation; 087 } 088 089 public boolean isConstant() { 090 return FHIRPathConstant.isFHIRPathConstant(current); 091 } 092 093 public boolean isFixedName() { 094 return FHIRPathConstant.isFHIRPathFixedName(current); 095 } 096 097 public boolean isStringConstant() { 098 return FHIRPathConstant.isFHIRPathStringConstant(current); 099 } 100 101 public String take() throws FHIRLexerException { 102 String s = current; 103 next(); 104 return s; 105 } 106 107 public int takeInt() throws FHIRLexerException { 108 String s = current; 109 if (!Utilities.isInteger(s)) 110 throw error("Found "+current+" expecting an integer"); 111 next(); 112 return Integer.parseInt(s); 113 } 114 115 public boolean isToken() { 116 if (Utilities.noString(current)) 117 return false; 118 119 if (current.startsWith("$")) 120 return true; 121 122 if (current.equals("*") || current.equals("**")) 123 return true; 124 125 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 126 for (int i = 1; i < current.length(); i++) 127 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 128 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 129 return false; 130 return true; 131 } 132 return false; 133 } 134 135 public FHIRLexerException error(String msg) { 136 return error(msg, currentLocation.toString(), currentLocation); 137 } 138 139 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 140 return new FHIRLexerException("Error @"+location+": "+msg, loc); 141 } 142 143 public void next() throws FHIRLexerException { 144 skipWhitespaceAndComments(); 145 current = null; 146 currentStart = cursor; 147 currentStartLocation = currentLocation.copy(); 148 if (cursor < source.length()) { 149 char ch = source.charAt(cursor); 150 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 151 cursor++; 152 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 153 cursor++; 154 current = source.substring(currentStart, cursor); 155 } else if (ch == '.' ) { 156 cursor++; 157 if (cursor < source.length() && (source.charAt(cursor) == '.')) 158 cursor++; 159 current = source.substring(currentStart, cursor); 160 } else if (ch >= '0' && ch <= '9') { 161 cursor++; 162 boolean dotted = false; 163 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 164 if (source.charAt(cursor) == '.') 165 dotted = true; 166 cursor++; 167 } 168 if (source.charAt(cursor-1) == '.') 169 cursor--; 170 current = source.substring(currentStart, cursor); 171 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 172 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 173 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 174 cursor++; 175 current = source.substring(currentStart, cursor); 176 } else if (ch == '%') { 177 cursor++; 178 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 179 cursor++; 180 while (cursor < source.length() && (source.charAt(cursor) != '`')) 181 cursor++; 182 cursor++; 183 } else 184 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 185 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_')) 186 cursor++; 187 current = source.substring(currentStart, cursor); 188 } else if (ch == '/') { 189 cursor++; 190 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 191 // we've run into metadata 192 cursor++; 193 cursor++; 194 current = source.substring(currentStart, cursor); 195 } else { 196 current = source.substring(currentStart, cursor); 197 } 198 } else if (ch == '$') { 199 cursor++; 200 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 201 cursor++; 202 current = source.substring(currentStart, cursor); 203 } else if (ch == '{') { 204 cursor++; 205 ch = source.charAt(cursor); 206 if (ch == '}') 207 cursor++; 208 current = source.substring(currentStart, cursor); 209 } else if (ch == '"' && allowDoubleQuotes) { 210 cursor++; 211 boolean escape = false; 212 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 213 if (escape) 214 escape = false; 215 else 216 escape = (source.charAt(cursor) == '\\'); 217 cursor++; 218 } 219 if (cursor == source.length()) 220 throw error("Unterminated string"); 221 cursor++; 222 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 223 } else if (ch == '`') { 224 cursor++; 225 boolean escape = false; 226 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 227 if (escape) 228 escape = false; 229 else 230 escape = (source.charAt(cursor) == '\\'); 231 cursor++; 232 } 233 if (cursor == source.length()) 234 throw error("Unterminated string"); 235 cursor++; 236 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 237 } else if (ch == '\''){ 238 cursor++; 239 char ech = ch; 240 boolean escape = false; 241 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 242 if (escape) 243 escape = false; 244 else 245 escape = (source.charAt(cursor) == '\\'); 246 cursor++; 247 } 248 if (cursor == source.length()) 249 throw error("Unterminated string"); 250 cursor++; 251 current = source.substring(currentStart, cursor); 252 if (ech == '\'') 253 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 254 } else if (ch == '`') { 255 cursor++; 256 boolean escape = false; 257 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 258 if (escape) 259 escape = false; 260 else 261 escape = (source.charAt(cursor) == '\\'); 262 cursor++; 263 } 264 if (cursor == source.length()) 265 throw error("Unterminated string"); 266 cursor++; 267 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 268 } else if (ch == '|' && liquidMode) { 269 cursor++; 270 ch = source.charAt(cursor); 271 if (ch == '|') 272 cursor++; 273 current = source.substring(currentStart, cursor); 274 } else if (ch == '@'){ 275 int start = cursor; 276 cursor++; 277 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 278 cursor++; 279 current = source.substring(currentStart, cursor); 280 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 281 cursor++; 282 current = source.substring(currentStart, cursor); 283 } 284 } 285 currentLocation.incColumn(cursor - currentStart); 286 } 287 288 private void skipWhitespaceAndComments() { 289 comments.clear(); 290 commentLocation = null; 291 boolean last13 = false; 292 boolean done = false; 293 while (cursor < source.length() && !done) { 294 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) { 295 if (commentLocation == null) { 296 commentLocation = currentLocation.copy(); 297 } 298 int start = cursor+2; 299 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 300 cursor++; 301 currentLocation.incColumn(); 302 } 303 comments.add(source.substring(start, cursor).trim()); 304 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 305 if (commentLocation == null) { 306 commentLocation = currentLocation.copy(); 307 } 308 int start = cursor+2; 309 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 310 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 311 cursor++; 312 currentLocation.incColumn(); 313 } 314 if (cursor >= source.length() -1) { 315 error("Unfinished comment"); 316 } else { 317 comments.add(source.substring(start, cursor).trim()); 318 cursor = cursor + 2; 319 currentLocation.incColumn(2); 320 } 321 } else if (Utilities.isWhitespace(source.charAt(cursor))) { 322 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 323 cursor++; 324 // checkChar increments the position 325 // currentLocation.incColumn(); 326 } else { 327 done = true; 328 } 329 } 330 } 331 332 private boolean isMetadataStart() { 333 return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3)); 334 } 335 336 private boolean isDateChar(char ch,int start) { 337 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 338 339 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 340 } 341 342 public boolean isOp() { 343 return ExpressionNode.Operation.fromCode(current) != null; 344 } 345 346 public boolean done() { 347 return currentStart >= source.length(); 348 } 349 350 public int nextId() { 351 id++; 352 return id; 353 } 354 355 public SourceLocation getCurrentStartLocation() { 356 return currentStartLocation; 357 } 358 359 // special case use 360 public void setCurrent(String current) { 361 this.current = current; 362 } 363 364 public boolean hasComment() { 365 return !done() && current.startsWith("//"); 366 } 367 368 public boolean hasComments() { 369 return comments.size() > 0; 370 } 371 372 373 public List<String> getComments() { 374 return comments; 375 } 376 377 public String getAllComments() { 378 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 379 b.addAll(comments); 380 comments.clear(); 381 return b.toString(); 382 } 383 384 public String getFirstComment() { 385 if (hasComments()) { 386 String s = comments.get(0); 387 comments.remove(0); 388 return s; 389 } else { 390 return null; 391 } 392 } 393 394 public boolean hasToken(String kw) { 395 return !done() && kw.equals(current); 396 } 397 public boolean hasToken(String... names) { 398 if (done()) 399 return false; 400 for (String s : names) 401 if (s.equals(current)) 402 return true; 403 return false; 404 } 405 406 public void token(String kw) throws FHIRLexerException { 407 if (!kw.equals(current)) 408 throw error("Found \""+current+"\" expecting \""+kw+"\""); 409 next(); 410 } 411 412 public String readConstant(String desc) throws FHIRLexerException { 413 if (!isStringConstant()) 414 throw error("Found "+current+" expecting \"["+desc+"]\""); 415 416 return processConstant(take()); 417 } 418 419 public String readFixedName(String desc) throws FHIRLexerException { 420 if (!isFixedName()) 421 throw error("Found "+current+" expecting \"["+desc+"]\""); 422 423 return processFixedName(take()); 424 } 425 426 public String processConstant(String s) throws FHIRLexerException { 427 StringBuilder b = new StringBuilder(); 428 int i = 1; 429 while (i < s.length()-1) { 430 char ch = s.charAt(i); 431 if (ch == '\\') { 432 i++; 433 switch (s.charAt(i)) { 434 case 't': 435 b.append('\t'); 436 break; 437 case 'r': 438 b.append('\r'); 439 break; 440 case 'n': 441 b.append('\n'); 442 break; 443 case 'f': 444 b.append('\f'); 445 break; 446 case '\'': 447 b.append('\''); 448 break; 449 case '"': 450 b.append('"'); 451 break; 452 case '`': 453 b.append('`'); 454 break; 455 case '\\': 456 b.append('\\'); 457 break; 458 case '/': 459 b.append('/'); 460 break; 461 case 'u': 462 i++; 463 int uc = Integer.parseInt(s.substring(i, i+4), 16); 464 b.append(Character.toString(uc)); 465 i = i + 4; 466 break; 467 default: 468 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 469 } 470 } else { 471 b.append(ch); 472 i++; 473 } 474 } 475 return b.toString(); 476 } 477 478 public String processFixedName(String s) throws FHIRLexerException { 479 StringBuilder b = new StringBuilder(); 480 int i = 1; 481 while (i < s.length()-1) { 482 char ch = s.charAt(i); 483 if (ch == '\\') { 484 i++; 485 switch (s.charAt(i)) { 486 case 't': 487 b.append('\t'); 488 break; 489 case 'r': 490 b.append('\r'); 491 break; 492 case 'n': 493 b.append('\n'); 494 break; 495 case 'f': 496 b.append('\f'); 497 break; 498 case '\'': 499 b.append('\''); 500 break; 501 case '"': 502 b.append('"'); 503 break; 504 case '\\': 505 b.append('\\'); 506 break; 507 case '/': 508 b.append('/'); 509 break; 510 case 'u': 511 i++; 512 int uc = Integer.parseInt(s.substring(i, i+4), 32); 513 b.append(Character.toString(uc)); 514 i = i + 4; 515 break; 516 default: 517 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 518 } 519 } else { 520 b.append(ch); 521 i++; 522 } 523 } 524 return b.toString(); 525 } 526 527 public void skipToken(String token) throws FHIRLexerException { 528 if (getCurrent().equals(token)) 529 next(); 530 531 } 532 533 public String takeDottedToken() throws FHIRLexerException { 534 StringBuilder b = new StringBuilder(); 535 b.append(take()); 536 while (!done() && getCurrent().equals(".")) { 537 b.append(take()); 538 b.append(take()); 539 } 540 return b.toString(); 541 } 542 543 public void skipComments() throws FHIRLexerException { 544 while (!done() && hasComment()) 545 next(); 546 } 547 548 public int getCurrentStart() { 549 return currentStart; 550 } 551 public String getSource() { 552 return source; 553 } 554 public boolean isLiquidMode() { 555 return liquidMode; 556 } 557 public void setLiquidMode(boolean liquidMode) { 558 this.liquidMode = liquidMode; 559 } 560 public SourceLocation getCommentLocation() { 561 return this.commentLocation; 562 } 563 public boolean isMetadataFormat() { 564 return metadataFormat; 565 } 566 public void setMetadataFormat(boolean metadataFormat) { 567 this.metadataFormat = metadataFormat; 568 } 569 public List<String> cloneComments() { 570 List<String> res = new ArrayList<>(); 571 res.addAll(getComments()); 572 return res; 573 } 574 public String tokenWithTrailingComment(String token) { 575 int line = getCurrentLocation().getLine(); 576 token(token); 577 if (getComments().size() > 0 && getCommentLocation().getLine() == line) { 578 return getFirstComment(); 579 } else { 580 return null; 581 } 582 } 583 public boolean isAllowDoubleQuotes() { 584 return allowDoubleQuotes; 585 } 586}