001package org.hl7.fhir.r5.fhirpath;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.hl7.fhir.exceptions.FHIRException;
007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
008import org.hl7.fhir.utilities.SourceLocation;
009import org.hl7.fhir.utilities.Utilities;
010
011// shared lexer for concrete syntaxes 
012// - FluentPath
013// - Mapping language
014
015public class FHIRLexer {
016  public class FHIRLexerException extends FHIRException {
017
018    private SourceLocation location;
019    
020    public FHIRLexerException(String message) {
021      super(message);
022    }
023    
024    public FHIRLexerException(String message, Throwable cause) {
025      super(message, cause);
026    }
027
028    public FHIRLexerException(String message, SourceLocation location) {
029      super(message);
030      this.location = location;
031    }
032
033    public SourceLocation getLocation() {
034      return location;
035    }
036
037  }
038
039  private String source;
040  private int cursor;
041  private int currentStart;
042  private String current;
043  private List<String> comments = new ArrayList<>();
044  private SourceLocation currentLocation;
045  private SourceLocation currentStartLocation;
046  private int id;
047  private String name;
048  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
049  private SourceLocation commentLocation;
050  private boolean metadataFormat;
051  private boolean allowDoubleQuotes;
052
053  public FHIRLexer(String source, String name) throws FHIRLexerException {
054    this.source = source == null ? "" : Utilities.stripBOM(source);
055    this.name = name == null ? "??" : name;
056    currentLocation = new SourceLocation(1, 1);
057    next();
058  }
059
060  public FHIRLexer(String source, int i) throws FHIRLexerException {
061    this.source = Utilities.stripBOM(source);
062    this.cursor = i;
063    currentLocation = new SourceLocation(1, 1);
064    next();
065  }
066  public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException {
067    this.source = Utilities.stripBOM(source);
068    this.cursor = i;
069    this.allowDoubleQuotes =  allowDoubleQuotes;
070    currentLocation = new SourceLocation(1, 1);
071    next();
072  }
073  public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException {
074    this.source = source == null ? "" : Utilities.stripBOM(source);
075    this.name = name == null ? "??" : name;
076    this.metadataFormat = metadataFormat;
077    this.allowDoubleQuotes =  allowDoubleQuotes;
078    currentLocation = new SourceLocation(1, 1);
079    next();
080  }
081  public String getCurrent() {
082    return current;
083  }
084
085  public SourceLocation getCurrentLocation() {
086    return currentLocation;
087  }
088
089  public boolean isConstant() {
090    return FHIRPathConstant.isFHIRPathConstant(current);
091  }
092
093  public boolean isFixedName() {
094    return FHIRPathConstant.isFHIRPathFixedName(current);
095  }
096
097  public boolean isStringConstant() {
098    return FHIRPathConstant.isFHIRPathStringConstant(current);
099  }
100
101  public String take() throws FHIRLexerException {
102    String s = current;
103    next();
104    return s;
105  }
106
107  public int takeInt() throws FHIRLexerException {
108    String s = current;
109    if (!Utilities.isInteger(s))
110      throw error("Found "+current+" expecting an integer");
111    next();
112    return Integer.parseInt(s);
113  }
114
115  public boolean isToken() {
116    if (Utilities.noString(current))
117      return false;
118
119    if (current.startsWith("$"))
120      return true;
121
122    if (current.equals("*") || current.equals("**"))
123      return true;
124
125    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
126      for (int i = 1; i < current.length(); i++) 
127        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
128            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
129          return false;
130      return true;
131    }
132    return false;
133  }
134
135  public FHIRLexerException error(String msg) {
136    return error(msg, currentLocation.toString(), currentLocation);
137  }
138
139  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
140    return new FHIRLexerException("Error @"+location+": "+msg, loc);
141  }
142
143  public void next() throws FHIRLexerException {
144    skipWhitespaceAndComments();
145    current = null;
146    currentStart = cursor;
147    currentStartLocation = currentLocation.copy();
148    if (cursor < source.length()) {
149      char ch = source.charAt(cursor);
150      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
151        cursor++;
152        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
153          cursor++;
154        current = source.substring(currentStart, cursor);
155      } else if (ch == '.' ) {
156        cursor++;
157        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
158          cursor++;
159        current = source.substring(currentStart, cursor);
160      } else if (ch >= '0' && ch <= '9') {
161          cursor++;
162        boolean dotted = false;
163        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
164          if (source.charAt(cursor) == '.')
165            dotted = true;
166          cursor++;
167        }
168        if (source.charAt(cursor-1) == '.')
169          cursor--;
170        current = source.substring(currentStart, cursor);
171      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
172        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
173            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
174          cursor++;
175        current = source.substring(currentStart, cursor);
176      } else if (ch == '%') {
177        cursor++;
178        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
179          cursor++;
180          while (cursor < source.length() && (source.charAt(cursor) != '`'))
181            cursor++;
182          cursor++;
183        } else
184        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
185            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_'))
186          cursor++;
187        current = source.substring(currentStart, cursor);
188      } else if (ch == '/') {
189        cursor++;
190        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
191          // we've run into metadata
192          cursor++;
193          cursor++;
194          current = source.substring(currentStart, cursor);
195        } else {
196          current = source.substring(currentStart, cursor);
197        }
198      } else if (ch == '$') {
199        cursor++;
200        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
201          cursor++;
202        current = source.substring(currentStart, cursor);
203      } else if (ch == '{') {
204        cursor++;
205        ch = source.charAt(cursor);
206        if (ch == '}')
207          cursor++;
208        current = source.substring(currentStart, cursor);
209      } else if (ch == '"' && allowDoubleQuotes) {
210        cursor++;
211        boolean escape = false;
212        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
213          if (escape)
214            escape = false;
215          else 
216            escape = (source.charAt(cursor) == '\\');
217          cursor++;
218        }
219        if (cursor == source.length())
220          throw error("Unterminated string");
221        cursor++;
222        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
223      } else if (ch == '`') {
224        cursor++;
225        boolean escape = false;
226        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
227          if (escape)
228            escape = false;
229          else 
230            escape = (source.charAt(cursor) == '\\');
231          cursor++;
232        }
233        if (cursor == source.length())
234          throw error("Unterminated string");
235        cursor++;
236        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
237      } else if (ch == '\''){
238        cursor++;
239        char ech = ch;
240        boolean escape = false;
241        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
242          if (escape)
243            escape = false;
244          else 
245            escape = (source.charAt(cursor) == '\\');
246          cursor++;
247        }
248        if (cursor == source.length())
249          throw error("Unterminated string");
250        cursor++;
251        current = source.substring(currentStart, cursor);
252        if (ech == '\'')
253          current = "\'"+current.substring(1, current.length() - 1)+"\'";
254      } else if (ch == '`') {
255        cursor++;
256        boolean escape = false;
257        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
258          if (escape)
259            escape = false;
260          else 
261            escape = (source.charAt(cursor) == '\\');
262          cursor++;
263        }
264        if (cursor == source.length())
265          throw error("Unterminated string");
266        cursor++;
267        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
268      } else if (ch == '|' && liquidMode) {
269        cursor++;
270        ch = source.charAt(cursor);
271        if (ch == '|')
272          cursor++;
273        current = source.substring(currentStart, cursor);
274      } else if (ch == '@'){
275        int start = cursor;
276        cursor++;
277        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
278          cursor++;          
279        current = source.substring(currentStart, cursor);
280      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
281        cursor++;
282        current = source.substring(currentStart, cursor);
283      }
284    }
285    currentLocation.incColumn(cursor - currentStart);
286  }
287
288  private void skipWhitespaceAndComments() {
289    comments.clear();
290    commentLocation = null;
291    boolean last13 = false;
292    boolean done = false;
293    while (cursor < source.length() && !done) {
294      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) {
295        if (commentLocation == null) {
296          commentLocation = currentLocation.copy();
297        }
298        int start = cursor+2;
299        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
300          cursor++;        
301          currentLocation.incColumn();
302        }
303        comments.add(source.substring(start, cursor).trim());
304      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
305        if (commentLocation == null) {
306          commentLocation = currentLocation.copy();
307        }
308        int start = cursor+2;
309        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
310          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
311          cursor++;        
312          currentLocation.incColumn();
313        }
314        if (cursor >= source.length() -1) {
315          error("Unfinished comment");
316        } else {
317          comments.add(source.substring(start, cursor).trim());
318          cursor = cursor + 2;
319          currentLocation.incColumn(2);
320        }
321      } else if (Utilities.isWhitespace(source.charAt(cursor))) {
322        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
323        cursor++;
324        // checkChar increments the position
325        // currentLocation.incColumn();
326      } else {
327        done = true;
328      }
329    }
330  }
331  
332  private boolean isMetadataStart() {
333    return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3));
334  }
335  
336  private boolean isDateChar(char ch,int start) {
337    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
338    
339    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
340  }
341
342  public boolean isOp() {
343    return ExpressionNode.Operation.fromCode(current) != null;
344  }
345
346  public boolean done() {
347    return currentStart >= source.length();
348  }
349
350  public int nextId() {
351    id++;
352    return id;
353  }
354
355  public SourceLocation getCurrentStartLocation() {
356    return currentStartLocation;
357  }
358
359  // special case use
360  public void setCurrent(String current) {
361    this.current = current;
362  }
363
364  public boolean hasComment() {
365    return !done() && current.startsWith("//");
366  }
367
368  public boolean hasComments() {
369    return comments.size() > 0;
370  }
371
372
373  public List<String> getComments() {
374    return comments;
375  }
376
377  public String getAllComments() {
378    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
379    b.addAll(comments);
380    comments.clear();
381    return b.toString();
382  }
383
384  public String getFirstComment() {
385    if (hasComments()) {
386      String s = comments.get(0);
387      comments.remove(0);
388      return s;      
389    } else {
390      return null;
391    }
392  }
393
394  public boolean hasToken(String kw) {
395    return !done() && kw.equals(current);
396  }
397  public boolean hasToken(String... names) {
398    if (done()) 
399      return false;
400    for (String s : names)
401      if (s.equals(current))
402        return true;
403    return false;
404  }
405  
406  public void token(String kw) throws FHIRLexerException {
407    if (!kw.equals(current)) 
408      throw error("Found \""+current+"\" expecting \""+kw+"\"");
409    next();
410  }
411  
412  public String readConstant(String desc) throws FHIRLexerException {
413    if (!isStringConstant())
414      throw error("Found "+current+" expecting \"["+desc+"]\"");
415
416    return processConstant(take());
417  }
418
419  public String readFixedName(String desc) throws FHIRLexerException {
420    if (!isFixedName())
421      throw error("Found "+current+" expecting \"["+desc+"]\"");
422
423    return processFixedName(take());
424  }
425
426  public String processConstant(String s) throws FHIRLexerException {
427    StringBuilder b = new StringBuilder();
428    int i = 1;
429    while (i < s.length()-1) {
430      char ch = s.charAt(i);
431      if (ch == '\\') {
432        i++;
433        switch (s.charAt(i)) {
434        case 't': 
435          b.append('\t');
436          break;
437        case 'r':
438          b.append('\r');
439          break;
440        case 'n': 
441          b.append('\n');
442          break;
443        case 'f': 
444          b.append('\f');
445          break;
446        case '\'':
447          b.append('\'');
448          break;
449        case '"':
450          b.append('"');
451          break;
452        case '`':
453          b.append('`');
454          break;
455        case '\\': 
456          b.append('\\');
457          break;
458        case '/': 
459          b.append('/');
460          break;
461        case 'u':
462          i++;
463          int uc = Integer.parseInt(s.substring(i, i+4), 16);
464          b.append(Character.toString(uc));
465          i = i + 4;
466          break;
467        default:
468          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
469        }
470      } else {
471        b.append(ch);
472        i++;
473      }
474    }
475    return b.toString();
476  }
477  
478  public String processFixedName(String s) throws FHIRLexerException {
479    StringBuilder b = new StringBuilder();
480    int i = 1;
481    while (i < s.length()-1) {
482      char ch = s.charAt(i);
483      if (ch == '\\') {
484        i++;
485        switch (s.charAt(i)) {
486        case 't': 
487          b.append('\t');
488          break;
489        case 'r':
490          b.append('\r');
491          break;
492        case 'n': 
493          b.append('\n');
494          break;
495        case 'f': 
496          b.append('\f');
497          break;
498        case '\'':
499          b.append('\'');
500          break;
501        case '"':
502          b.append('"');
503          break;
504        case '\\': 
505          b.append('\\');
506          break;
507        case '/': 
508          b.append('/');
509          break;
510        case 'u':
511          i++;
512          int uc = Integer.parseInt(s.substring(i, i+4), 32);
513          b.append(Character.toString(uc));
514          i = i + 4;
515          break;
516        default:
517          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
518        }
519      } else {
520        b.append(ch);
521        i++;
522      }
523    }
524    return b.toString();
525  }
526
527  public void skipToken(String token) throws FHIRLexerException {
528    if (getCurrent().equals(token))
529      next();
530    
531  }
532  
533  public String takeDottedToken() throws FHIRLexerException {
534    StringBuilder b = new StringBuilder();
535    b.append(take());
536    while (!done() && getCurrent().equals(".")) {
537      b.append(take());
538      b.append(take());
539    }
540    return b.toString();
541  }
542
543  public void skipComments() throws FHIRLexerException {
544    while (!done() && hasComment())
545      next();
546  }
547
548  public int getCurrentStart() {
549    return currentStart;
550  }
551  public String getSource() {
552    return source;
553  }
554  public boolean isLiquidMode() {
555    return liquidMode;
556  }
557  public void setLiquidMode(boolean liquidMode) {
558    this.liquidMode = liquidMode;
559  }
560  public SourceLocation getCommentLocation() {
561    return this.commentLocation;
562  }
563  public boolean isMetadataFormat() {
564    return metadataFormat;
565  }
566  public void setMetadataFormat(boolean metadataFormat) {
567    this.metadataFormat = metadataFormat;
568  }
569  public List<String> cloneComments() {
570    List<String> res = new ArrayList<>();
571    res.addAll(getComments());
572    return res;
573  }
574  public String tokenWithTrailingComment(String token) {
575    int line = getCurrentLocation().getLine();
576    token(token);
577    if (getComments().size() > 0 && getCommentLocation().getLine() == line) {
578      return getFirstComment();
579    } else {
580      return null;
581    }
582  }
583  public boolean isAllowDoubleQuotes() {
584    return allowDoubleQuotes;
585  }
586}