001package org.hl7.fhir.r5.fhirpath; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.hl7.fhir.exceptions.FHIRException; 007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 008import org.hl7.fhir.utilities.SourceLocation; 009import org.hl7.fhir.utilities.Utilities; 010 011// shared lexer for concrete syntaxes 012// - FluentPath 013// - Mapping language 014 015public class FHIRLexer { 016 public class FHIRLexerException extends FHIRException { 017 018 private SourceLocation location; 019 020// public FHIRLexerException() { 021// super(); 022// } 023// 024// public FHIRLexerException(String message, Throwable cause) { 025// super(message, cause); 026// } 027// 028// public FHIRLexerException(String message) { 029// super(message); 030// } 031// 032// public FHIRLexerException(Throwable cause) { 033// super(cause); 034// } 035 036 public FHIRLexerException(String message, SourceLocation location) { 037 super(message); 038 this.location = location; 039 } 040 041 public SourceLocation getLocation() { 042 return location; 043 } 044 045 } 046 private String source; 047 private int cursor; 048 private int currentStart; 049 private String current; 050 private List<String> comments = new ArrayList<>(); 051 private SourceLocation currentLocation; 052 private SourceLocation currentStartLocation; 053 private int id; 054 private String name; 055 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 056 private SourceLocation commentLocation; 057 private boolean metadataFormat; 058 private boolean allowDoubleQuotes; 059 060 public FHIRLexer(String source, String name) throws FHIRLexerException { 061 this.source = source == null ? "" : Utilities.stripBOM(source); 062 this.name = name == null ? "??" : name; 063 currentLocation = new SourceLocation(1, 1); 064 next(); 065 } 066 public FHIRLexer(String source, int i) throws FHIRLexerException { 067 this.source = Utilities.stripBOM(source); 068 this.cursor = i; 069 currentLocation = new SourceLocation(1, 1); 070 next(); 071 } 072 public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException { 073 this.source = Utilities.stripBOM(source); 074 this.cursor = i; 075 this.allowDoubleQuotes = allowDoubleQuotes; 076 currentLocation = new SourceLocation(1, 1); 077 next(); 078 } 079 public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException { 080 this.source = source == null ? "" : Utilities.stripBOM(source); 081 this.name = name == null ? "??" : name; 082 this.metadataFormat = metadataFormat; 083 this.allowDoubleQuotes = allowDoubleQuotes; 084 currentLocation = new SourceLocation(1, 1); 085 next(); 086 } 087 public String getCurrent() { 088 return current; 089 } 090 public SourceLocation getCurrentLocation() { 091 return currentLocation; 092 } 093 094 public boolean isConstant() { 095 return FHIRPathConstant.isFHIRPathConstant(current); 096 } 097 098 public boolean isFixedName() { 099 return FHIRPathConstant.isFHIRPathFixedName(current); 100 } 101 102 public boolean isStringConstant() { 103 return FHIRPathConstant.isFHIRPathStringConstant(current); 104 } 105 106 public String take() throws FHIRLexerException { 107 String s = current; 108 next(); 109 return s; 110 } 111 112 public int takeInt() throws FHIRLexerException { 113 String s = current; 114 if (!Utilities.isInteger(s)) 115 throw error("Found "+current+" expecting an integer"); 116 next(); 117 return Integer.parseInt(s); 118 } 119 120 public boolean isToken() { 121 if (Utilities.noString(current)) 122 return false; 123 124 if (current.startsWith("$")) 125 return true; 126 127 if (current.equals("*") || current.equals("**")) 128 return true; 129 130 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 131 for (int i = 1; i < current.length(); i++) 132 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 133 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 134 return false; 135 return true; 136 } 137 return false; 138 } 139 140 public FHIRLexerException error(String msg) { 141 return error(msg, currentLocation.toString(), currentLocation); 142 } 143 144 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 145 return new FHIRLexerException("Error @"+location+": "+msg, loc); 146 } 147 148 public void next() throws FHIRLexerException { 149 skipWhitespaceAndComments(); 150 current = null; 151 currentStart = cursor; 152 currentStartLocation = currentLocation; 153 if (cursor < source.length()) { 154 char ch = source.charAt(cursor); 155 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 156 cursor++; 157 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 158 cursor++; 159 current = source.substring(currentStart, cursor); 160 } else if (ch == '.' ) { 161 cursor++; 162 if (cursor < source.length() && (source.charAt(cursor) == '.')) 163 cursor++; 164 current = source.substring(currentStart, cursor); 165 } else if (ch >= '0' && ch <= '9') { 166 cursor++; 167 boolean dotted = false; 168 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 169 if (source.charAt(cursor) == '.') 170 dotted = true; 171 cursor++; 172 } 173 if (source.charAt(cursor-1) == '.') 174 cursor--; 175 current = source.substring(currentStart, cursor); 176 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 177 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 178 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 179 cursor++; 180 current = source.substring(currentStart, cursor); 181 } else if (ch == '%') { 182 cursor++; 183 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 184 cursor++; 185 while (cursor < source.length() && (source.charAt(cursor) != '`')) 186 cursor++; 187 cursor++; 188 } else 189 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 190 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_')) 191 cursor++; 192 current = source.substring(currentStart, cursor); 193 } else if (ch == '/') { 194 cursor++; 195 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 196 // we've run into metadata 197 cursor++; 198 cursor++; 199 current = source.substring(currentStart, cursor); 200 } else { 201 current = source.substring(currentStart, cursor); 202 } 203 } else if (ch == '$') { 204 cursor++; 205 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 206 cursor++; 207 current = source.substring(currentStart, cursor); 208 } else if (ch == '{') { 209 cursor++; 210 ch = source.charAt(cursor); 211 if (ch == '}') 212 cursor++; 213 current = source.substring(currentStart, cursor); 214 } else if (ch == '"' && allowDoubleQuotes) { 215 cursor++; 216 boolean escape = false; 217 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 218 if (escape) 219 escape = false; 220 else 221 escape = (source.charAt(cursor) == '\\'); 222 cursor++; 223 } 224 if (cursor == source.length()) 225 throw error("Unterminated string"); 226 cursor++; 227 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 228 } else if (ch == '`') { 229 cursor++; 230 boolean escape = false; 231 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 232 if (escape) 233 escape = false; 234 else 235 escape = (source.charAt(cursor) == '\\'); 236 cursor++; 237 } 238 if (cursor == source.length()) 239 throw error("Unterminated string"); 240 cursor++; 241 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 242 } else if (ch == '\''){ 243 cursor++; 244 char ech = ch; 245 boolean escape = false; 246 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 247 if (escape) 248 escape = false; 249 else 250 escape = (source.charAt(cursor) == '\\'); 251 cursor++; 252 } 253 if (cursor == source.length()) 254 throw error("Unterminated string"); 255 cursor++; 256 current = source.substring(currentStart, cursor); 257 if (ech == '\'') 258 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 259 } else if (ch == '`') { 260 cursor++; 261 boolean escape = false; 262 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 263 if (escape) 264 escape = false; 265 else 266 escape = (source.charAt(cursor) == '\\'); 267 cursor++; 268 } 269 if (cursor == source.length()) 270 throw error("Unterminated string"); 271 cursor++; 272 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 273 } else if (ch == '|' && liquidMode) { 274 cursor++; 275 ch = source.charAt(cursor); 276 if (ch == '|') 277 cursor++; 278 current = source.substring(currentStart, cursor); 279 } else if (ch == '@'){ 280 int start = cursor; 281 cursor++; 282 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 283 cursor++; 284 current = source.substring(currentStart, cursor); 285 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 286 cursor++; 287 current = source.substring(currentStart, cursor); 288 } 289 } 290 } 291 292 private void skipWhitespaceAndComments() { 293 comments.clear(); 294 commentLocation = null; 295 boolean last13 = false; 296 boolean done = false; 297 while (cursor < source.length() && !done) { 298 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) { 299 if (commentLocation == null) { 300 commentLocation = currentLocation.copy(); 301 } 302 int start = cursor+2; 303 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 304 cursor++; 305 } 306 comments.add(source.substring(start, cursor).trim()); 307 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 308 if (commentLocation == null) { 309 commentLocation = currentLocation.copy(); 310 } 311 int start = cursor+2; 312 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 313 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 314 cursor++; 315 } 316 if (cursor >= source.length() -1) { 317 error("Unfinished comment"); 318 } else { 319 comments.add(source.substring(start, cursor).trim()); 320 cursor = cursor + 2; 321 } 322 } else if (Utilities.isWhitespace(source.charAt(cursor))) { 323 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 324 cursor++; 325 } else { 326 done = true; 327 } 328 } 329 } 330 331 private boolean isMetadataStart() { 332 return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3)); 333 } 334 335 private boolean isDateChar(char ch,int start) { 336 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 337 338 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 339 } 340 public boolean isOp() { 341 return ExpressionNode.Operation.fromCode(current) != null; 342 } 343 public boolean done() { 344 return currentStart >= source.length(); 345 } 346 public int nextId() { 347 id++; 348 return id; 349 } 350 public SourceLocation getCurrentStartLocation() { 351 return currentStartLocation; 352 } 353 354 // special case use 355 public void setCurrent(String current) { 356 this.current = current; 357 } 358 359 public boolean hasComments() { 360 return comments.size() > 0; 361 } 362 363 public List<String> getComments() { 364 return comments; 365 } 366 367 public String getAllComments() { 368 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 369 b.addAll(comments); 370 comments.clear(); 371 return b.toString(); 372 } 373 374 public String getFirstComment() { 375 if (hasComments()) { 376 String s = comments.get(0); 377 comments.remove(0); 378 return s; 379 } else { 380 return null; 381 } 382 } 383 384 public boolean hasToken(String kw) { 385 return !done() && kw.equals(current); 386 } 387 public boolean hasToken(String... names) { 388 if (done()) 389 return false; 390 for (String s : names) 391 if (s.equals(current)) 392 return true; 393 return false; 394 } 395 396 public void token(String kw) throws FHIRLexerException { 397 if (!kw.equals(current)) 398 throw error("Found \""+current+"\" expecting \""+kw+"\""); 399 next(); 400 } 401 402 public String readConstant(String desc) throws FHIRLexerException { 403 if (!isStringConstant()) 404 throw error("Found "+current+" expecting \"["+desc+"]\""); 405 406 return processConstant(take()); 407 } 408 409 public String readFixedName(String desc) throws FHIRLexerException { 410 if (!isFixedName()) 411 throw error("Found "+current+" expecting \"["+desc+"]\""); 412 413 return processFixedName(take()); 414 } 415 416 public String processConstant(String s) throws FHIRLexerException { 417 StringBuilder b = new StringBuilder(); 418 int i = 1; 419 while (i < s.length()-1) { 420 char ch = s.charAt(i); 421 if (ch == '\\') { 422 i++; 423 switch (s.charAt(i)) { 424 case 't': 425 b.append('\t'); 426 break; 427 case 'r': 428 b.append('\r'); 429 break; 430 case 'n': 431 b.append('\n'); 432 break; 433 case 'f': 434 b.append('\f'); 435 break; 436 case '\'': 437 b.append('\''); 438 break; 439 case '"': 440 b.append('"'); 441 break; 442 case '`': 443 b.append('`'); 444 break; 445 case '\\': 446 b.append('\\'); 447 break; 448 case '/': 449 b.append('/'); 450 break; 451 case 'u': 452 i++; 453 int uc = Integer.parseInt(s.substring(i, i+4), 16); 454 b.append(Character.toString(uc)); 455 i = i + 4; 456 break; 457 default: 458 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 459 } 460 } else { 461 b.append(ch); 462 i++; 463 } 464 } 465 return b.toString(); 466 } 467 468 public String processFixedName(String s) throws FHIRLexerException { 469 StringBuilder b = new StringBuilder(); 470 int i = 1; 471 while (i < s.length()-1) { 472 char ch = s.charAt(i); 473 if (ch == '\\') { 474 i++; 475 switch (s.charAt(i)) { 476 case 't': 477 b.append('\t'); 478 break; 479 case 'r': 480 b.append('\r'); 481 break; 482 case 'n': 483 b.append('\n'); 484 break; 485 case 'f': 486 b.append('\f'); 487 break; 488 case '\'': 489 b.append('\''); 490 break; 491 case '"': 492 b.append('"'); 493 break; 494 case '\\': 495 b.append('\\'); 496 break; 497 case '/': 498 b.append('/'); 499 break; 500 case 'u': 501 i++; 502 int uc = Integer.parseInt(s.substring(i, i+4), 32); 503 b.append(Character.toString(uc)); 504 i = i + 4; 505 break; 506 default: 507 throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation); 508 } 509 } else { 510 b.append(ch); 511 i++; 512 } 513 } 514 return b.toString(); 515 } 516 517 public void skipToken(String token) throws FHIRLexerException { 518 if (getCurrent().equals(token)) 519 next(); 520 521 } 522 public String takeDottedToken() throws FHIRLexerException { 523 StringBuilder b = new StringBuilder(); 524 b.append(take()); 525 while (!done() && getCurrent().equals(".")) { 526 b.append(take()); 527 b.append(take()); 528 } 529 return b.toString(); 530 } 531 532 public int getCurrentStart() { 533 return currentStart; 534 } 535 public String getSource() { 536 return source; 537 } 538 public boolean isLiquidMode() { 539 return liquidMode; 540 } 541 public void setLiquidMode(boolean liquidMode) { 542 this.liquidMode = liquidMode; 543 } 544 public SourceLocation getCommentLocation() { 545 return this.commentLocation; 546 } 547 public boolean isMetadataFormat() { 548 return metadataFormat; 549 } 550 public void setMetadataFormat(boolean metadataFormat) { 551 this.metadataFormat = metadataFormat; 552 } 553 public List<String> cloneComments() { 554 List<String> res = new ArrayList<>(); 555 res.addAll(getComments()); 556 return res; 557 } 558 public String tokenWithTrailingComment(String token) { 559 int line = getCurrentLocation().getLine(); 560 token(token); 561 if (getComments().size() > 0 && getCommentLocation().getLine() == line) { 562 return getFirstComment(); 563 } else { 564 return null; 565 } 566 } 567 public boolean isAllowDoubleQuotes() { 568 return allowDoubleQuotes; 569 } 570}