
001package org.hl7.fhir.r4.fhirpath; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.hl7.fhir.exceptions.FHIRException; 007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 008import org.hl7.fhir.utilities.SourceLocation; 009import org.hl7.fhir.utilities.Utilities; 010 011// shared lexer for concrete syntaxes 012// - FluentPath 013// - Mapping language 014 015public class FHIRLexer { 016 public class FHIRLexerException extends FHIRException { 017 018 private SourceLocation location; 019 020 public FHIRLexerException(String message) { 021 super(message); 022 } 023 024 public FHIRLexerException(String message, Throwable cause) { 025 super(message, cause); 026 } 027 028 public FHIRLexerException(String message, SourceLocation location) { 029 super(message); 030 this.location = location; 031 } 032 033 public SourceLocation getLocation() { 034 return location; 035 } 036 037 } 038 039 private String source; 040 private int cursor; 041 private int currentStart; 042 private String current; 043 private List<String> comments = new ArrayList<>(); 044 private SourceLocation currentLocation; 045 private SourceLocation currentStartLocation; 046 private int id; 047 private String name; 048 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 049 private SourceLocation commentLocation; 050 private boolean metadataFormat; 051 private boolean allowDoubleQuotes; 052 053 public FHIRLexer(String source, String name) throws FHIRLexerException { 054 this.source = source == null ? "" : Utilities.stripBOM(source); 055 this.name = name == null ? "??" : name; 056 currentLocation = new SourceLocation(1, 1); 057 next(); 058 } 059 060 public FHIRLexer(String source, int i) throws FHIRLexerException { 061 this.source = Utilities.stripBOM(source); 062 this.cursor = i; 063 currentLocation = new SourceLocation(1, 1); 064 next(); 065 } 066 public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException { 067 this.source = Utilities.stripBOM(source); 068 this.cursor = i; 069 this.allowDoubleQuotes = allowDoubleQuotes; 070 currentLocation = new SourceLocation(1, 1); 071 next(); 072 } 073 public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException { 074 this.source = source == null ? "" : Utilities.stripBOM(source); 075 this.name = name == null ? "??" : name; 076 this.metadataFormat = metadataFormat; 077 this.allowDoubleQuotes = allowDoubleQuotes; 078 currentLocation = new SourceLocation(1, 1); 079 next(); 080 } 081 public String getCurrent() { 082 return current; 083 } 084 085 public SourceLocation getCurrentLocation() { 086 return currentLocation; 087 } 088 089 public boolean isConstant() { 090 return FHIRPathConstant.isFHIRPathConstant(current); 091 } 092 093 public boolean isFixedName() { 094 return FHIRPathConstant.isFHIRPathFixedName(current); 095 } 096 097 public boolean isStringConstant() { 098 return FHIRPathConstant.isFHIRPathStringConstant(current); 099 } 100 101 public String take() throws FHIRLexerException { 102 String s = current; 103 next(); 104 return s; 105 } 106 107 public int takeInt() throws FHIRLexerException { 108 String s = current; 109 if (!Utilities.isInteger(s)) 110 throw error("Found "+current+" expecting an integer"); 111 next(); 112 return Integer.parseInt(s); 113 } 114 115 public boolean isToken() { 116 if (Utilities.noString(current)) 117 return false; 118 119 if (current.startsWith("$")) 120 return true; 121 122 if (current.equals("*") || current.equals("**")) 123 return true; 124 125 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 126 for (int i = 1; i < current.length(); i++) 127 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 128 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 129 return false; 130 return true; 131 } 132 return false; 133 } 134 135 public FHIRLexerException error(String msg) { 136 return error(msg, currentLocation.toString(), currentLocation); 137 } 138 139 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 140 return new FHIRLexerException("Error @"+location+": "+msg, loc); 141 } 142 143 public void next() throws FHIRLexerException { 144 skipWhitespaceAndComments(); 145 current = null; 146 currentStart = cursor; 147 currentStartLocation = currentLocation.copy(); 148 if (cursor < source.length()) { 149 char ch = source.charAt(cursor); 150 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 151 cursor++; 152 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 153 cursor++; 154 current = source.substring(currentStart, cursor); 155 } else if (ch == '.' ) { 156 cursor++; 157 if (cursor < source.length() && (source.charAt(cursor) == '.')) 158 cursor++; 159 current = source.substring(currentStart, cursor); 160 } else if (ch >= '0' && ch <= '9') { 161 cursor++; 162 boolean dotted = false; 163 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 164 if (source.charAt(cursor) == '.') 165 dotted = true; 166 cursor++; 167 } 168 if (source.charAt(cursor-1) == '.') 169 cursor--; 170 current = source.substring(currentStart, cursor); 171 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 172 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 173 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 174 cursor++; 175 current = source.substring(currentStart, cursor); 176 } else if (ch == '%') { 177 cursor++; 178 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 179 cursor++; 180 while (cursor < source.length() && (source.charAt(cursor) != '`')) 181 cursor++; 182 cursor++; 183 } else 184 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 185 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_')) 186 cursor++; 187 current = source.substring(currentStart, cursor); 188 } else if (ch == '/') { 189 cursor++; 190 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 191 // we've run into metadata 192 cursor++; 193 cursor++; 194 current = source.substring(currentStart, cursor); 195 } else { 196 current = source.substring(currentStart, cursor); 197 } 198 } else if (ch == '$') { 199 cursor++; 200 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 201 cursor++; 202 current = source.substring(currentStart, cursor); 203 } else if (ch == '{') { 204 cursor++; 205 ch = source.charAt(cursor); 206 if (ch == '}') 207 cursor++; 208 current = source.substring(currentStart, cursor); 209 } else if (ch == '"' && allowDoubleQuotes) { 210 cursor++; 211 boolean escape = false; 212 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 213 if (escape) 214 escape = false; 215 else 216 escape = (source.charAt(cursor) == '\\'); 217 cursor++; 218 } 219 if (cursor == source.length()) 220 throw error("Unterminated string"); 221 cursor++; 222 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 223 } else if (ch == '`') { 224 cursor++; 225 boolean escape = false; 226 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 227 if (escape) 228 escape = false; 229 else 230 escape = (source.charAt(cursor) == '\\'); 231 cursor++; 232 } 233 if (cursor == source.length()) 234 throw error("Unterminated string"); 235 cursor++; 236 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 237 } else if (ch == '\''){ 238 cursor++; 239 char ech = ch; 240 boolean escape = false; 241 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 242 if (escape) 243 escape = false; 244 else 245 escape = (source.charAt(cursor) == '\\'); 246 cursor++; 247 } 248 if (cursor == source.length()) 249 throw error("Unterminated string"); 250 cursor++; 251 current = source.substring(currentStart, cursor); 252 if (ech == '\'') 253 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 254 } else if (ch == '`') { 255 cursor++; 256 boolean escape = false; 257 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 258 if (escape) 259 escape = false; 260 else 261 escape = (source.charAt(cursor) == '\\'); 262 cursor++; 263 } 264 if (cursor == source.length()) 265 throw error("Unterminated string"); 266 cursor++; 267 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 268 } else if (ch == '|' && liquidMode) { 269 cursor++; 270 ch = source.charAt(cursor); 271 if (ch == '|') 272 cursor++; 273 current = source.substring(currentStart, cursor); 274 } else if (ch == '@'){ 275 int start = cursor; 276 cursor++; 277 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 278 cursor++; 279 current = source.substring(currentStart, cursor); 280 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 281 cursor++; 282 current = source.substring(currentStart, cursor); 283 } 284 } 285 currentLocation.incColumn(cursor - currentStart); 286 } 287 288 private void skipWhitespaceAndComments() { 289 comments.clear(); 290 commentLocation = null; 291 boolean last13 = false; 292 boolean done = false; 293 while (cursor < source.length() && !done) { 294 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) { 295 if (commentLocation == null) { 296 commentLocation = currentLocation.copy(); 297 } 298 int start = cursor+2; 299 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 300 cursor++; 301 currentLocation.incColumn(); 302 } 303 comments.add(source.substring(start, cursor).trim()); 304 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 305 if (commentLocation == null) { 306 commentLocation = currentLocation.copy(); 307 } 308 int start = cursor+2; 309 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 310 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 311 cursor++; 312 currentLocation.incColumn(); 313 } 314 if (cursor >= source.length() -1) { 315 error("Unfinished comment"); 316 } else { 317 comments.add(source.substring(start, cursor).trim()); 318 cursor = cursor + 2; 319 currentLocation.incColumn(2); 320 } 321 } else if (Utilities.isWhitespace(source.charAt(cursor))) { 322 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 323 cursor++; 324 currentLocation.incColumn(); 325 } else { 326 done = true; 327 } 328 } 329 } 330 331 private boolean isMetadataStart() { 332 return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3)); 333 } 334 335 private boolean isDateChar(char ch,int start) { 336 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 337 338 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 339 } 340 341 public boolean isOp() { 342 return ExpressionNode.Operation.fromCode(current) != null; 343 } 344 345 public boolean done() { 346 return currentStart >= source.length(); 347 } 348 349 public int nextId() { 350 id++; 351 return id; 352 } 353 354 public SourceLocation getCurrentStartLocation() { 355 return currentStartLocation; 356 } 357 358 // special case use 359 public void setCurrent(String current) { 360 this.current = current; 361 } 362 363 public boolean hasComment() { 364 return !done() && current.startsWith("//"); 365 } 366 367 public boolean hasComments() { 368 return comments.size() > 0; 369 } 370 371 372 public List<String> getComments() { 373 return comments; 374 } 375 376 public String getAllComments() { 377 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 378 b.addAll(comments); 379 comments.clear(); 380 return b.toString(); 381 } 382 383 public String getFirstComment() { 384 if (hasComments()) { 385 String s = comments.get(0); 386 comments.remove(0); 387 return s; 388 } else { 389 return null; 390 } 391 } 392 393 public boolean hasToken(String kw) { 394 return !done() && kw.equals(current); 395 } 396 public boolean hasToken(String... names) { 397 if (done()) 398 return false; 399 for (String s : names) 400 if (s.equals(current)) 401 return true; 402 return false; 403 } 404 405 public void token(String kw) throws FHIRLexerException { 406 if (!kw.equals(current)) 407 throw error("Found \""+current+"\" expecting \""+kw+"\""); 408 next(); 409 } 410 411 public String readConstant(String desc) throws FHIRLexerException { 412 if (!isStringConstant()) 413 throw error("Found "+current+" expecting \"["+desc+"]\""); 414 415 return processConstant(take()); 416 } 417 418 public String readFixedName(String desc) throws FHIRLexerException { 419 if (!isFixedName()) 420 throw error("Found "+current+" expecting \"["+desc+"]\""); 421 422 return processFixedName(take()); 423 } 424 425 public String processConstant(String s) throws FHIRLexerException { 426 StringBuilder b = new StringBuilder(); 427 int i = 1; 428 while (i < s.length()-1) { 429 char ch = s.charAt(i); 430 if (ch == '\\') { 431 i++; 432 switch (s.charAt(i)) { 433 case 't': 434 b.append('\t'); 435 break; 436 case 'r': 437 b.append('\r'); 438 break; 439 case 'n': 440 b.append('\n'); 441 break; 442 case 'f': 443 b.append('\f'); 444 break; 445 case '\'': 446 b.append('\''); 447 break; 448 case '"': 449 b.append('"'); 450 break; 451 case '`': 452 b.append('`'); 453 break; 454 case '\\': 455 b.append('\\'); 456 break; 457 case '/': 458 b.append('/'); 459 break; 460 case 'u': 461 i++; 462 int uc = Integer.parseInt(s.substring(i, i+4), 16); 463 b.append(Character.toString(uc)); 464 i = i + 4; 465 break; 466 default: 467 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 468 } 469 } else { 470 b.append(ch); 471 i++; 472 } 473 } 474 return b.toString(); 475 } 476 477 public String processFixedName(String s) throws FHIRLexerException { 478 StringBuilder b = new StringBuilder(); 479 int i = 1; 480 while (i < s.length()-1) { 481 char ch = s.charAt(i); 482 if (ch == '\\') { 483 i++; 484 switch (s.charAt(i)) { 485 case 't': 486 b.append('\t'); 487 break; 488 case 'r': 489 b.append('\r'); 490 break; 491 case 'n': 492 b.append('\n'); 493 break; 494 case 'f': 495 b.append('\f'); 496 break; 497 case '\'': 498 b.append('\''); 499 break; 500 case '"': 501 b.append('"'); 502 break; 503 case '\\': 504 b.append('\\'); 505 break; 506 case '/': 507 b.append('/'); 508 break; 509 case 'u': 510 i++; 511 int uc = Integer.parseInt(s.substring(i, i+4), 32); 512 b.append(Character.toString(uc)); 513 i = i + 4; 514 break; 515 default: 516 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 517 } 518 } else { 519 b.append(ch); 520 i++; 521 } 522 } 523 return b.toString(); 524 } 525 526 public void skipToken(String token) throws FHIRLexerException { 527 if (getCurrent().equals(token)) 528 next(); 529 530 } 531 532 public String takeDottedToken() throws FHIRLexerException { 533 StringBuilder b = new StringBuilder(); 534 b.append(take()); 535 while (!done() && getCurrent().equals(".")) { 536 b.append(take()); 537 b.append(take()); 538 } 539 return b.toString(); 540 } 541 542 public void skipComments() throws FHIRLexerException { 543 while (!done() && hasComment()) 544 next(); 545 } 546 547 public int getCurrentStart() { 548 return currentStart; 549 } 550 public String getSource() { 551 return source; 552 } 553 public boolean isLiquidMode() { 554 return liquidMode; 555 } 556 public void setLiquidMode(boolean liquidMode) { 557 this.liquidMode = liquidMode; 558 } 559 public SourceLocation getCommentLocation() { 560 return this.commentLocation; 561 } 562 public boolean isMetadataFormat() { 563 return metadataFormat; 564 } 565 public void setMetadataFormat(boolean metadataFormat) { 566 this.metadataFormat = metadataFormat; 567 } 568 public List<String> cloneComments() { 569 List<String> res = new ArrayList<>(); 570 res.addAll(getComments()); 571 return res; 572 } 573 public String tokenWithTrailingComment(String token) { 574 int line = getCurrentLocation().getLine(); 575 token(token); 576 if (getComments().size() > 0 && getCommentLocation().getLine() == line) { 577 return getFirstComment(); 578 } else { 579 return null; 580 } 581 } 582 public boolean isAllowDoubleQuotes() { 583 return allowDoubleQuotes; 584 } 585}