001package org.hl7.fhir.r5.utils; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.hl7.fhir.exceptions.FHIRException; 007 008/* 009 Copyright (c) 2011+, HL7, Inc. 010 All rights reserved. 011 012 Redistribution and use in source and binary forms, with or without modification, 013 are permitted provided that the following conditions are met: 014 015 * Redistributions of source code must retain the above copyright notice, this 016 list of conditions and the following disclaimer. 017 * Redistributions in binary form must reproduce the above copyright notice, 018 this list of conditions and the following disclaimer in the documentation 019 and/or other materials provided with the distribution. 020 * Neither the name of HL7 nor the names of its contributors may be used to 021 endorse or promote products derived from this software without specific 022 prior written permission. 023 024 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 025 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 026 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 027 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 028 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 029 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 030 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 031 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 032 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 033 POSSIBILITY OF SUCH DAMAGE. 034 035 */ 036 037 038 039import org.hl7.fhir.r5.model.ExpressionNode; 040import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 041import org.hl7.fhir.utilities.SourceLocation; 042import org.hl7.fhir.utilities.Utilities; 043 044// shared lexer for concrete syntaxes 045// - FluentPath 046// - Mapping language 047 048public class FHIRLexer { 049 public class FHIRLexerException extends FHIRException { 050 051 private SourceLocation location; 052 053// public FHIRLexerException() { 054// super(); 055// } 056// 057// public FHIRLexerException(String message, Throwable cause) { 058// super(message, cause); 059// } 060// 061// public FHIRLexerException(String message) { 062// super(message); 063// } 064// 065// public FHIRLexerException(Throwable cause) { 066// super(cause); 067// } 068 069 public FHIRLexerException(String message, SourceLocation location) { 070 super(message); 071 this.location = location; 072 } 073 074 public SourceLocation getLocation() { 075 return location; 076 } 077 078 } 079 private String source; 080 private int cursor; 081 private int currentStart; 082 private String current; 083 private List<String> comments = new ArrayList<>(); 084 private SourceLocation currentLocation; 085 private SourceLocation currentStartLocation; 086 private int id; 087 private String name; 088 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 089 private SourceLocation commentLocation; 090 private boolean metadataFormat; 091 private boolean allowDoubleQuotes; 092 093 public FHIRLexer(String source, String name) throws FHIRLexerException { 094 this.source = source == null ? "" : source; 095 this.name = name == null ? "??" : name; 096 currentLocation = new SourceLocation(1, 1); 097 next(); 098 } 099 public FHIRLexer(String source, int i) throws FHIRLexerException { 100 this.source = source; 101 this.cursor = i; 102 currentLocation = new SourceLocation(1, 1); 103 next(); 104 } 105 public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException { 106 this.source = source; 107 this.cursor = i; 108 this.allowDoubleQuotes = allowDoubleQuotes; 109 currentLocation = new SourceLocation(1, 1); 110 next(); 111 } 112 public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException { 113 this.source = source == null ? "" : source; 114 this.name = name == null ? "??" : name; 115 this.metadataFormat = metadataFormat; 116 this.allowDoubleQuotes = allowDoubleQuotes; 117 currentLocation = new SourceLocation(1, 1); 118 next(); 119 } 120 public String getCurrent() { 121 return current; 122 } 123 public SourceLocation getCurrentLocation() { 124 return currentLocation; 125 } 126 127 public boolean isConstant() { 128 return FHIRPathConstant.isFHIRPathConstant(current); 129 } 130 131 public boolean isFixedName() { 132 return FHIRPathConstant.isFHIRPathFixedName(current); 133 } 134 135 public boolean isStringConstant() { 136 return FHIRPathConstant.isFHIRPathStringConstant(current); 137 } 138 139 public String take() throws FHIRLexerException { 140 String s = current; 141 next(); 142 return s; 143 } 144 145 public int takeInt() throws FHIRLexerException { 146 String s = current; 147 if (!Utilities.isInteger(s)) 148 throw error("Found "+current+" expecting an integer"); 149 next(); 150 return Integer.parseInt(s); 151 } 152 153 public boolean isToken() { 154 if (Utilities.noString(current)) 155 return false; 156 157 if (current.startsWith("$")) 158 return true; 159 160 if (current.equals("*") || current.equals("**")) 161 return true; 162 163 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 164 for (int i = 1; i < current.length(); i++) 165 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 166 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 167 return false; 168 return true; 169 } 170 return false; 171 } 172 173 public FHIRLexerException error(String msg) { 174 return error(msg, currentLocation.toString(), currentLocation); 175 } 176 177 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 178 return new FHIRLexerException("Error @"+location+": "+msg, loc); 179 } 180 181 public void next() throws FHIRLexerException { 182 skipWhitespaceAndComments(); 183 current = null; 184 currentStart = cursor; 185 currentStartLocation = currentLocation; 186 if (cursor < source.length()) { 187 char ch = source.charAt(cursor); 188 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 189 cursor++; 190 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 191 cursor++; 192 current = source.substring(currentStart, cursor); 193 } else if (ch == '.' ) { 194 cursor++; 195 if (cursor < source.length() && (source.charAt(cursor) == '.')) 196 cursor++; 197 current = source.substring(currentStart, cursor); 198 } else if (ch >= '0' && ch <= '9') { 199 cursor++; 200 boolean dotted = false; 201 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 202 if (source.charAt(cursor) == '.') 203 dotted = true; 204 cursor++; 205 } 206 if (source.charAt(cursor-1) == '.') 207 cursor--; 208 current = source.substring(currentStart, cursor); 209 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 210 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 211 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 212 cursor++; 213 current = source.substring(currentStart, cursor); 214 } else if (ch == '%') { 215 cursor++; 216 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 217 cursor++; 218 while (cursor < source.length() && (source.charAt(cursor) != '`')) 219 cursor++; 220 cursor++; 221 } else 222 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 223 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 224 cursor++; 225 current = source.substring(currentStart, cursor); 226 } else if (ch == '/') { 227 cursor++; 228 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 229 // we've run into metadata 230 cursor++; 231 cursor++; 232 current = source.substring(currentStart, cursor); 233 } else { 234 current = source.substring(currentStart, cursor); 235 } 236 } else if (ch == '$') { 237 cursor++; 238 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 239 cursor++; 240 current = source.substring(currentStart, cursor); 241 } else if (ch == '{') { 242 cursor++; 243 ch = source.charAt(cursor); 244 if (ch == '}') 245 cursor++; 246 current = source.substring(currentStart, cursor); 247 } else if (ch == '"' && allowDoubleQuotes) { 248 cursor++; 249 boolean escape = false; 250 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 251 if (escape) 252 escape = false; 253 else 254 escape = (source.charAt(cursor) == '\\'); 255 cursor++; 256 } 257 if (cursor == source.length()) 258 throw error("Unterminated string"); 259 cursor++; 260 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 261 } else if (ch == '`') { 262 cursor++; 263 boolean escape = false; 264 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 265 if (escape) 266 escape = false; 267 else 268 escape = (source.charAt(cursor) == '\\'); 269 cursor++; 270 } 271 if (cursor == source.length()) 272 throw error("Unterminated string"); 273 cursor++; 274 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 275 } else if (ch == '\''){ 276 cursor++; 277 char ech = ch; 278 boolean escape = false; 279 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 280 if (escape) 281 escape = false; 282 else 283 escape = (source.charAt(cursor) == '\\'); 284 cursor++; 285 } 286 if (cursor == source.length()) 287 throw error("Unterminated string"); 288 cursor++; 289 current = source.substring(currentStart, cursor); 290 if (ech == '\'') 291 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 292 } else if (ch == '`') { 293 cursor++; 294 boolean escape = false; 295 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 296 if (escape) 297 escape = false; 298 else 299 escape = (source.charAt(cursor) == '\\'); 300 cursor++; 301 } 302 if (cursor == source.length()) 303 throw error("Unterminated string"); 304 cursor++; 305 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 306 } else if (ch == '|' && liquidMode) { 307 cursor++; 308 ch = source.charAt(cursor); 309 if (ch == '|') 310 cursor++; 311 current = source.substring(currentStart, cursor); 312 } else if (ch == '@'){ 313 int start = cursor; 314 cursor++; 315 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 316 cursor++; 317 current = source.substring(currentStart, cursor); 318 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 319 cursor++; 320 current = source.substring(currentStart, cursor); 321 } 322 } 323 } 324 325 private void skipWhitespaceAndComments() { 326 comments.clear(); 327 commentLocation = null; 328 boolean last13 = false; 329 boolean done = false; 330 while (cursor < source.length() && !done) { 331 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) { 332 if (commentLocation == null) { 333 commentLocation = currentLocation.copy(); 334 } 335 int start = cursor+2; 336 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 337 cursor++; 338 } 339 comments.add(source.substring(start, cursor).trim()); 340 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 341 if (commentLocation == null) { 342 commentLocation = currentLocation.copy(); 343 } 344 int start = cursor+2; 345 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 346 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 347 cursor++; 348 } 349 if (cursor >= source.length() -1) { 350 error("Unfinished comment"); 351 } else { 352 comments.add(source.substring(start, cursor).trim()); 353 cursor = cursor + 2; 354 } 355 } else if (Utilities.isWhitespace(source.charAt(cursor))) { 356 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 357 cursor++; 358 } else { 359 done = true; 360 } 361 } 362 } 363 364 private boolean isMetadataStart() { 365 return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3)); 366 } 367 368 private boolean isDateChar(char ch,int start) { 369 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 370 371 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 372 } 373 public boolean isOp() { 374 return ExpressionNode.Operation.fromCode(current) != null; 375 } 376 public boolean done() { 377 return currentStart >= source.length(); 378 } 379 public int nextId() { 380 id++; 381 return id; 382 } 383 public SourceLocation getCurrentStartLocation() { 384 return currentStartLocation; 385 } 386 387 // special case use 388 public void setCurrent(String current) { 389 this.current = current; 390 } 391 392 public boolean hasComments() { 393 return comments.size() > 0; 394 } 395 396 public List<String> getComments() { 397 return comments; 398 } 399 400 public String getAllComments() { 401 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 402 b.addAll(comments); 403 comments.clear(); 404 return b.toString(); 405 } 406 407 public String getFirstComment() { 408 if (hasComments()) { 409 String s = comments.get(0); 410 comments.remove(0); 411 return s; 412 } else { 413 return null; 414 } 415 } 416 417 public boolean hasToken(String kw) { 418 return !done() && kw.equals(current); 419 } 420 public boolean hasToken(String... names) { 421 if (done()) 422 return false; 423 for (String s : names) 424 if (s.equals(current)) 425 return true; 426 return false; 427 } 428 429 public void token(String kw) throws FHIRLexerException { 430 if (!kw.equals(current)) 431 throw error("Found \""+current+"\" expecting \""+kw+"\""); 432 next(); 433 } 434 435 public String readConstant(String desc) throws FHIRLexerException { 436 if (!isStringConstant()) 437 throw error("Found "+current+" expecting \"["+desc+"]\""); 438 439 return processConstant(take()); 440 } 441 442 public String readFixedName(String desc) throws FHIRLexerException { 443 if (!isFixedName()) 444 throw error("Found "+current+" expecting \"["+desc+"]\""); 445 446 return processFixedName(take()); 447 } 448 449 public String processConstant(String s) throws FHIRLexerException { 450 StringBuilder b = new StringBuilder(); 451 int i = 1; 452 while (i < s.length()-1) { 453 char ch = s.charAt(i); 454 if (ch == '\\') { 455 i++; 456 switch (s.charAt(i)) { 457 case 't': 458 b.append('\t'); 459 break; 460 case 'r': 461 b.append('\r'); 462 break; 463 case 'n': 464 b.append('\n'); 465 break; 466 case 'f': 467 b.append('\f'); 468 break; 469 case '\'': 470 b.append('\''); 471 break; 472 case '"': 473 b.append('"'); 474 break; 475 case '`': 476 b.append('`'); 477 break; 478 case '\\': 479 b.append('\\'); 480 break; 481 case '/': 482 b.append('/'); 483 break; 484 case 'u': 485 i++; 486 int uc = Integer.parseInt(s.substring(i, i+4), 16); 487 b.append((char) uc); 488 i = i + 4; 489 break; 490 default: 491 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation); 492 } 493 } else { 494 b.append(ch); 495 i++; 496 } 497 } 498 return b.toString(); 499 } 500 501 public String processFixedName(String s) throws FHIRLexerException { 502 StringBuilder b = new StringBuilder(); 503 int i = 1; 504 while (i < s.length()-1) { 505 char ch = s.charAt(i); 506 if (ch == '\\') { 507 i++; 508 switch (s.charAt(i)) { 509 case 't': 510 b.append('\t'); 511 break; 512 case 'r': 513 b.append('\r'); 514 break; 515 case 'n': 516 b.append('\n'); 517 break; 518 case 'f': 519 b.append('\f'); 520 break; 521 case '\'': 522 b.append('\''); 523 break; 524 case '"': 525 b.append('"'); 526 break; 527 case '\\': 528 b.append('\\'); 529 break; 530 case '/': 531 b.append('/'); 532 break; 533 case 'u': 534 i++; 535 int uc = Integer.parseInt(s.substring(i, i+4), 16); 536 b.append((char) uc); 537 i = i + 4; 538 break; 539 default: 540 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation); 541 } 542 } else { 543 b.append(ch); 544 i++; 545 } 546 } 547 return b.toString(); 548 } 549 550 public void skipToken(String token) throws FHIRLexerException { 551 if (getCurrent().equals(token)) 552 next(); 553 554 } 555 public String takeDottedToken() throws FHIRLexerException { 556 StringBuilder b = new StringBuilder(); 557 b.append(take()); 558 while (!done() && getCurrent().equals(".")) { 559 b.append(take()); 560 b.append(take()); 561 } 562 return b.toString(); 563 } 564 565 public int getCurrentStart() { 566 return currentStart; 567 } 568 public String getSource() { 569 return source; 570 } 571 public boolean isLiquidMode() { 572 return liquidMode; 573 } 574 public void setLiquidMode(boolean liquidMode) { 575 this.liquidMode = liquidMode; 576 } 577 public SourceLocation getCommentLocation() { 578 return this.commentLocation; 579 } 580 public boolean isMetadataFormat() { 581 return metadataFormat; 582 } 583 public void setMetadataFormat(boolean metadataFormat) { 584 this.metadataFormat = metadataFormat; 585 } 586 public List<String> cloneComments() { 587 List<String> res = new ArrayList<>(); 588 res.addAll(getComments()); 589 return res; 590 } 591 public String tokenWithTrailingComment(String token) { 592 int line = getCurrentLocation().getLine(); 593 token(token); 594 if (getComments().size() > 0 && getCommentLocation().getLine() == line) { 595 return getFirstComment(); 596 } else { 597 return null; 598 } 599 } 600 public boolean isAllowDoubleQuotes() { 601 return allowDoubleQuotes; 602 } 603}