
001package org.hl7.fhir.r4.fhirpath; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.hl7.fhir.exceptions.FHIRException; 007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 008import org.hl7.fhir.utilities.SourceLocation; 009import org.hl7.fhir.utilities.Utilities; 010 011// shared lexer for concrete syntaxes 012// - FluentPath 013// - Mapping language 014 015public class FHIRLexer { 016 public class FHIRLexerException extends FHIRException { 017 018 private SourceLocation location; 019 020 public FHIRLexerException(String message) { 021 super(message); 022 } 023 024 public FHIRLexerException(String message, Throwable cause) { 025 super(message, cause); 026 } 027 028 public FHIRLexerException(String message, SourceLocation location) { 029 super(message); 030 this.location = location; 031 } 032 033 public SourceLocation getLocation() { 034 return location; 035 } 036 037 } 038 039 private String source; 040 private int cursor; 041 private int currentStart; 042 private String current; 043 private List<String> comments = new ArrayList<>(); 044 private SourceLocation currentLocation; 045 private SourceLocation currentStartLocation; 046 private int id; 047 private String name; 048 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 049 private SourceLocation commentLocation; 050 private boolean metadataFormat; 051 private boolean allowDoubleQuotes; 052 053 public FHIRLexer(String source, String name) throws FHIRLexerException { 054 this.source = source == null ? "" : Utilities.stripBOM(source); 055 this.name = name == null ? "??" : name; 056 currentLocation = new SourceLocation(1, 1); 057 next(); 058 } 059 060 public FHIRLexer(String source, int i) throws FHIRLexerException { 061 this.source = Utilities.stripBOM(source); 062 this.cursor = i; 063 currentLocation = new SourceLocation(1, 1); 064 next(); 065 } 066 public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException { 067 this.source = Utilities.stripBOM(source); 068 this.cursor = i; 069 this.allowDoubleQuotes = allowDoubleQuotes; 070 currentLocation = new SourceLocation(1, 1); 071 next(); 072 } 073 public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException { 074 this.source = source == null ? "" : Utilities.stripBOM(source); 075 this.name = name == null ? "??" : name; 076 this.metadataFormat = metadataFormat; 077 this.allowDoubleQuotes = allowDoubleQuotes; 078 currentLocation = new SourceLocation(1, 1); 079 next(); 080 } 081 public String getCurrent() { 082 return current; 083 } 084 085 public SourceLocation getCurrentLocation() { 086 return currentLocation; 087 } 088 089 public boolean isConstant() { 090 return FHIRPathConstant.isFHIRPathConstant(current); 091 } 092 093 public boolean isFixedName() { 094 return FHIRPathConstant.isFHIRPathFixedName(current); 095 } 096 097 public boolean isStringConstant() { 098 return FHIRPathConstant.isFHIRPathStringConstant(current); 099 } 100 101 public String take() throws FHIRLexerException { 102 String s = current; 103 next(); 104 return s; 105 } 106 107 public int takeInt() throws FHIRLexerException { 108 String s = current; 109 if (!Utilities.isInteger(s)) 110 throw error("Found "+current+" expecting an integer"); 111 next(); 112 return Integer.parseInt(s); 113 } 114 115 public boolean isToken() { 116 if (Utilities.noString(current)) 117 return false; 118 119 if (current.startsWith("$")) 120 return true; 121 122 if (current.equals("*") || current.equals("**")) 123 return true; 124 125 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 126 for (int i = 1; i < current.length(); i++) 127 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 128 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 129 return false; 130 return true; 131 } 132 return false; 133 } 134 135 public FHIRLexerException error(String msg) { 136 return error(msg, currentLocation.toString(), currentLocation); 137 } 138 139 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 140 return new FHIRLexerException("Error @"+location+": "+msg, loc); 141 } 142 143 public void next() throws FHIRLexerException { 144 skipWhitespaceAndComments(); 145 current = null; 146 currentStart = cursor; 147 currentStartLocation = currentLocation; 148 if (cursor < source.length()) { 149 char ch = source.charAt(cursor); 150 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 151 cursor++; 152 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 153 cursor++; 154 current = source.substring(currentStart, cursor); 155 } else if (ch == '.' ) { 156 cursor++; 157 if (cursor < source.length() && (source.charAt(cursor) == '.')) 158 cursor++; 159 current = source.substring(currentStart, cursor); 160 } else if (ch >= '0' && ch <= '9') { 161 cursor++; 162 boolean dotted = false; 163 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 164 if (source.charAt(cursor) == '.') 165 dotted = true; 166 cursor++; 167 } 168 if (source.charAt(cursor-1) == '.') 169 cursor--; 170 current = source.substring(currentStart, cursor); 171 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 172 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 173 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 174 cursor++; 175 current = source.substring(currentStart, cursor); 176 } else if (ch == '%') { 177 cursor++; 178 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 179 cursor++; 180 while (cursor < source.length() && (source.charAt(cursor) != '`')) 181 cursor++; 182 cursor++; 183 } else 184 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 185 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_')) 186 cursor++; 187 current = source.substring(currentStart, cursor); 188 } else if (ch == '/') { 189 cursor++; 190 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 191 // we've run into metadata 192 cursor++; 193 cursor++; 194 current = source.substring(currentStart, cursor); 195 } else { 196 current = source.substring(currentStart, cursor); 197 } 198 } else if (ch == '$') { 199 cursor++; 200 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 201 cursor++; 202 current = source.substring(currentStart, cursor); 203 } else if (ch == '{') { 204 cursor++; 205 ch = source.charAt(cursor); 206 if (ch == '}') 207 cursor++; 208 current = source.substring(currentStart, cursor); 209 } else if (ch == '"' && allowDoubleQuotes) { 210 cursor++; 211 boolean escape = false; 212 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 213 if (escape) 214 escape = false; 215 else 216 escape = (source.charAt(cursor) == '\\'); 217 cursor++; 218 } 219 if (cursor == source.length()) 220 throw error("Unterminated string"); 221 cursor++; 222 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 223 } else if (ch == '`') { 224 cursor++; 225 boolean escape = false; 226 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 227 if (escape) 228 escape = false; 229 else 230 escape = (source.charAt(cursor) == '\\'); 231 cursor++; 232 } 233 if (cursor == source.length()) 234 throw error("Unterminated string"); 235 cursor++; 236 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 237 } else if (ch == '\''){ 238 cursor++; 239 char ech = ch; 240 boolean escape = false; 241 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 242 if (escape) 243 escape = false; 244 else 245 escape = (source.charAt(cursor) == '\\'); 246 cursor++; 247 } 248 if (cursor == source.length()) 249 throw error("Unterminated string"); 250 cursor++; 251 current = source.substring(currentStart, cursor); 252 if (ech == '\'') 253 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 254 } else if (ch == '`') { 255 cursor++; 256 boolean escape = false; 257 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 258 if (escape) 259 escape = false; 260 else 261 escape = (source.charAt(cursor) == '\\'); 262 cursor++; 263 } 264 if (cursor == source.length()) 265 throw error("Unterminated string"); 266 cursor++; 267 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 268 } else if (ch == '|' && liquidMode) { 269 cursor++; 270 ch = source.charAt(cursor); 271 if (ch == '|') 272 cursor++; 273 current = source.substring(currentStart, cursor); 274 } else if (ch == '@'){ 275 int start = cursor; 276 cursor++; 277 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 278 cursor++; 279 current = source.substring(currentStart, cursor); 280 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 281 cursor++; 282 current = source.substring(currentStart, cursor); 283 } 284 } 285 } 286 287 private void skipWhitespaceAndComments() { 288 comments.clear(); 289 commentLocation = null; 290 boolean last13 = false; 291 boolean done = false; 292 while (cursor < source.length() && !done) { 293 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) { 294 if (commentLocation == null) { 295 commentLocation = currentLocation.copy(); 296 } 297 int start = cursor+2; 298 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 299 cursor++; 300 } 301 comments.add(source.substring(start, cursor).trim()); 302 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 303 if (commentLocation == null) { 304 commentLocation = currentLocation.copy(); 305 } 306 int start = cursor+2; 307 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 308 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 309 cursor++; 310 } 311 if (cursor >= source.length() -1) { 312 error("Unfinished comment"); 313 } else { 314 comments.add(source.substring(start, cursor).trim()); 315 cursor = cursor + 2; 316 } 317 } else if (Utilities.isWhitespace(source.charAt(cursor))) { 318 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 319 cursor++; 320 } else { 321 done = true; 322 } 323 } 324 } 325 326 private boolean isMetadataStart() { 327 return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3)); 328 } 329 330 private boolean isDateChar(char ch,int start) { 331 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 332 333 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 334 } 335 336 public boolean isOp() { 337 return ExpressionNode.Operation.fromCode(current) != null; 338 } 339 340 public boolean done() { 341 return currentStart >= source.length(); 342 } 343 344 public int nextId() { 345 id++; 346 return id; 347 } 348 349 public SourceLocation getCurrentStartLocation() { 350 return currentStartLocation; 351 } 352 353 // special case use 354 public void setCurrent(String current) { 355 this.current = current; 356 } 357 358 public boolean hasComment() { 359 return !done() && current.startsWith("//"); 360 } 361 362 public boolean hasComments() { 363 return comments.size() > 0; 364 } 365 366 367 public List<String> getComments() { 368 return comments; 369 } 370 371 public String getAllComments() { 372 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 373 b.addAll(comments); 374 comments.clear(); 375 return b.toString(); 376 } 377 378 public String getFirstComment() { 379 if (hasComments()) { 380 String s = comments.get(0); 381 comments.remove(0); 382 return s; 383 } else { 384 return null; 385 } 386 } 387 388 public boolean hasToken(String kw) { 389 return !done() && kw.equals(current); 390 } 391 public boolean hasToken(String... names) { 392 if (done()) 393 return false; 394 for (String s : names) 395 if (s.equals(current)) 396 return true; 397 return false; 398 } 399 400 public void token(String kw) throws FHIRLexerException { 401 if (!kw.equals(current)) 402 throw error("Found \""+current+"\" expecting \""+kw+"\""); 403 next(); 404 } 405 406 public String readConstant(String desc) throws FHIRLexerException { 407 if (!isStringConstant()) 408 throw error("Found "+current+" expecting \"["+desc+"]\""); 409 410 return processConstant(take()); 411 } 412 413 public String readFixedName(String desc) throws FHIRLexerException { 414 if (!isFixedName()) 415 throw error("Found "+current+" expecting \"["+desc+"]\""); 416 417 return processFixedName(take()); 418 } 419 420 public String processConstant(String s) throws FHIRLexerException { 421 StringBuilder b = new StringBuilder(); 422 int i = 1; 423 while (i < s.length()-1) { 424 char ch = s.charAt(i); 425 if (ch == '\\') { 426 i++; 427 switch (s.charAt(i)) { 428 case 't': 429 b.append('\t'); 430 break; 431 case 'r': 432 b.append('\r'); 433 break; 434 case 'n': 435 b.append('\n'); 436 break; 437 case 'f': 438 b.append('\f'); 439 break; 440 case '\'': 441 b.append('\''); 442 break; 443 case '"': 444 b.append('"'); 445 break; 446 case '`': 447 b.append('`'); 448 break; 449 case '\\': 450 b.append('\\'); 451 break; 452 case '/': 453 b.append('/'); 454 break; 455 case 'u': 456 i++; 457 int uc = Integer.parseInt(s.substring(i, i+4), 16); 458 b.append(Character.toString(uc)); 459 i = i + 4; 460 break; 461 default: 462 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 463 } 464 } else { 465 b.append(ch); 466 i++; 467 } 468 } 469 return b.toString(); 470 } 471 472 public String processFixedName(String s) throws FHIRLexerException { 473 StringBuilder b = new StringBuilder(); 474 int i = 1; 475 while (i < s.length()-1) { 476 char ch = s.charAt(i); 477 if (ch == '\\') { 478 i++; 479 switch (s.charAt(i)) { 480 case 't': 481 b.append('\t'); 482 break; 483 case 'r': 484 b.append('\r'); 485 break; 486 case 'n': 487 b.append('\n'); 488 break; 489 case 'f': 490 b.append('\f'); 491 break; 492 case '\'': 493 b.append('\''); 494 break; 495 case '"': 496 b.append('"'); 497 break; 498 case '\\': 499 b.append('\\'); 500 break; 501 case '/': 502 b.append('/'); 503 break; 504 case 'u': 505 i++; 506 int uc = Integer.parseInt(s.substring(i, i+4), 32); 507 b.append(Character.toString(uc)); 508 i = i + 4; 509 break; 510 default: 511 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 512 } 513 } else { 514 b.append(ch); 515 i++; 516 } 517 } 518 return b.toString(); 519 } 520 521 public void skipToken(String token) throws FHIRLexerException { 522 if (getCurrent().equals(token)) 523 next(); 524 525 } 526 527 public String takeDottedToken() throws FHIRLexerException { 528 StringBuilder b = new StringBuilder(); 529 b.append(take()); 530 while (!done() && getCurrent().equals(".")) { 531 b.append(take()); 532 b.append(take()); 533 } 534 return b.toString(); 535 } 536 537 public void skipComments() throws FHIRLexerException { 538 while (!done() && hasComment()) 539 next(); 540 } 541 542 public int getCurrentStart() { 543 return currentStart; 544 } 545 public String getSource() { 546 return source; 547 } 548 public boolean isLiquidMode() { 549 return liquidMode; 550 } 551 public void setLiquidMode(boolean liquidMode) { 552 this.liquidMode = liquidMode; 553 } 554 public SourceLocation getCommentLocation() { 555 return this.commentLocation; 556 } 557 public boolean isMetadataFormat() { 558 return metadataFormat; 559 } 560 public void setMetadataFormat(boolean metadataFormat) { 561 this.metadataFormat = metadataFormat; 562 } 563 public List<String> cloneComments() { 564 List<String> res = new ArrayList<>(); 565 res.addAll(getComments()); 566 return res; 567 } 568 public String tokenWithTrailingComment(String token) { 569 int line = getCurrentLocation().getLine(); 570 token(token); 571 if (getComments().size() > 0 && getCommentLocation().getLine() == line) { 572 return getFirstComment(); 573 } else { 574 return null; 575 } 576 } 577 public boolean isAllowDoubleQuotes() { 578 return allowDoubleQuotes; 579 } 580}