001package org.hl7.fhir.r4.fhirpath;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.hl7.fhir.exceptions.FHIRException;
007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
008import org.hl7.fhir.utilities.SourceLocation;
009import org.hl7.fhir.utilities.Utilities;
010
011// shared lexer for concrete syntaxes 
012// - FluentPath
013// - Mapping language
014
015public class FHIRLexer {
016  public class FHIRLexerException extends FHIRException {
017
018    private SourceLocation location;
019    
020    public FHIRLexerException(String message) {
021      super(message);
022    }
023    
024    public FHIRLexerException(String message, Throwable cause) {
025      super(message, cause);
026    }
027
028    public FHIRLexerException(String message, SourceLocation location) {
029      super(message);
030      this.location = location;
031    }
032
033    public SourceLocation getLocation() {
034      return location;
035    }
036
037  }
038
039  private String source;
040  private int cursor;
041  private int currentStart;
042  private String current;
043  private List<String> comments = new ArrayList<>();
044  private SourceLocation currentLocation;
045  private SourceLocation currentStartLocation;
046  private int id;
047  private String name;
048  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
049  private SourceLocation commentLocation;
050  private boolean metadataFormat;
051  private boolean allowDoubleQuotes;
052
053  public FHIRLexer(String source, String name) throws FHIRLexerException {
054    this.source = source == null ? "" : Utilities.stripBOM(source);
055    this.name = name == null ? "??" : name;
056    currentLocation = new SourceLocation(1, 1);
057    next();
058  }
059
060  public FHIRLexer(String source, int i) throws FHIRLexerException {
061    this.source = Utilities.stripBOM(source);
062    this.cursor = i;
063    currentLocation = new SourceLocation(1, 1);
064    next();
065  }
066  public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException {
067    this.source = Utilities.stripBOM(source);
068    this.cursor = i;
069    this.allowDoubleQuotes =  allowDoubleQuotes;
070    currentLocation = new SourceLocation(1, 1);
071    next();
072  }
073  public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException {
074    this.source = source == null ? "" : Utilities.stripBOM(source);
075    this.name = name == null ? "??" : name;
076    this.metadataFormat = metadataFormat;
077    this.allowDoubleQuotes =  allowDoubleQuotes;
078    currentLocation = new SourceLocation(1, 1);
079    next();
080  }
081  public String getCurrent() {
082    return current;
083  }
084
085  public SourceLocation getCurrentLocation() {
086    return currentLocation;
087  }
088
089  public boolean isConstant() {
090    return FHIRPathConstant.isFHIRPathConstant(current);
091  }
092
093  public boolean isFixedName() {
094    return FHIRPathConstant.isFHIRPathFixedName(current);
095  }
096
097  public boolean isStringConstant() {
098    return FHIRPathConstant.isFHIRPathStringConstant(current);
099  }
100
101  public String take() throws FHIRLexerException {
102    String s = current;
103    next();
104    return s;
105  }
106
107  public int takeInt() throws FHIRLexerException {
108    String s = current;
109    if (!Utilities.isInteger(s))
110      throw error("Found "+current+" expecting an integer");
111    next();
112    return Integer.parseInt(s);
113  }
114
115  public boolean isToken() {
116    if (Utilities.noString(current))
117      return false;
118
119    if (current.startsWith("$"))
120      return true;
121
122    if (current.equals("*") || current.equals("**"))
123      return true;
124
125    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
126      for (int i = 1; i < current.length(); i++) 
127        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
128            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
129          return false;
130      return true;
131    }
132    return false;
133  }
134
135  public FHIRLexerException error(String msg) {
136    return error(msg, currentLocation.toString(), currentLocation);
137  }
138
139  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
140    return new FHIRLexerException("Error @"+location+": "+msg, loc);
141  }
142
143  public void next() throws FHIRLexerException {
144    skipWhitespaceAndComments();
145    current = null;
146    currentStart = cursor;
147    currentStartLocation = currentLocation.copy();
148    if (cursor < source.length()) {
149      char ch = source.charAt(cursor);
150      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
151        cursor++;
152        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
153          cursor++;
154        current = source.substring(currentStart, cursor);
155      } else if (ch == '.' ) {
156        cursor++;
157        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
158          cursor++;
159        current = source.substring(currentStart, cursor);
160      } else if (ch >= '0' && ch <= '9') {
161          cursor++;
162        boolean dotted = false;
163        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
164          if (source.charAt(cursor) == '.')
165            dotted = true;
166          cursor++;
167        }
168        if (source.charAt(cursor-1) == '.')
169          cursor--;
170        current = source.substring(currentStart, cursor);
171      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
172        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
173            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
174          cursor++;
175        current = source.substring(currentStart, cursor);
176      } else if (ch == '%') {
177        cursor++;
178        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
179          cursor++;
180          while (cursor < source.length() && (source.charAt(cursor) != '`'))
181            cursor++;
182          cursor++;
183        } else
184        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
185            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_'))
186          cursor++;
187        current = source.substring(currentStart, cursor);
188      } else if (ch == '/') {
189        cursor++;
190        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
191          // we've run into metadata
192          cursor++;
193          cursor++;
194          current = source.substring(currentStart, cursor);
195        } else {
196          current = source.substring(currentStart, cursor);
197        }
198      } else if (ch == '$') {
199        cursor++;
200        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
201          cursor++;
202        current = source.substring(currentStart, cursor);
203      } else if (ch == '{') {
204        cursor++;
205        ch = source.charAt(cursor);
206        if (ch == '}')
207          cursor++;
208        current = source.substring(currentStart, cursor);
209      } else if (ch == '"' && allowDoubleQuotes) {
210        cursor++;
211        boolean escape = false;
212        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
213          if (escape)
214            escape = false;
215          else 
216            escape = (source.charAt(cursor) == '\\');
217          cursor++;
218        }
219        if (cursor == source.length())
220          throw error("Unterminated string");
221        cursor++;
222        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
223      } else if (ch == '`') {
224        cursor++;
225        boolean escape = false;
226        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
227          if (escape)
228            escape = false;
229          else 
230            escape = (source.charAt(cursor) == '\\');
231          cursor++;
232        }
233        if (cursor == source.length())
234          throw error("Unterminated string");
235        cursor++;
236        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
237      } else if (ch == '\''){
238        cursor++;
239        char ech = ch;
240        boolean escape = false;
241        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
242          if (escape)
243            escape = false;
244          else 
245            escape = (source.charAt(cursor) == '\\');
246          cursor++;
247        }
248        if (cursor == source.length())
249          throw error("Unterminated string");
250        cursor++;
251        current = source.substring(currentStart, cursor);
252        if (ech == '\'')
253          current = "\'"+current.substring(1, current.length() - 1)+"\'";
254      } else if (ch == '`') {
255        cursor++;
256        boolean escape = false;
257        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
258          if (escape)
259            escape = false;
260          else 
261            escape = (source.charAt(cursor) == '\\');
262          cursor++;
263        }
264        if (cursor == source.length())
265          throw error("Unterminated string");
266        cursor++;
267        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
268      } else if (ch == '|' && liquidMode) {
269        cursor++;
270        ch = source.charAt(cursor);
271        if (ch == '|')
272          cursor++;
273        current = source.substring(currentStart, cursor);
274      } else if (ch == '@'){
275        int start = cursor;
276        cursor++;
277        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
278          cursor++;          
279        current = source.substring(currentStart, cursor);
280      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
281        cursor++;
282        current = source.substring(currentStart, cursor);
283      }
284    }
285    currentLocation.incColumn(cursor - currentStart);
286  }
287
288  private void skipWhitespaceAndComments() {
289    comments.clear();
290    commentLocation = null;
291    boolean last13 = false;
292    boolean done = false;
293    while (cursor < source.length() && !done) {
294      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) {
295        if (commentLocation == null) {
296          commentLocation = currentLocation.copy();
297        }
298        int start = cursor+2;
299        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
300          cursor++;        
301          currentLocation.incColumn();
302        }
303        comments.add(source.substring(start, cursor).trim());
304      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
305        if (commentLocation == null) {
306          commentLocation = currentLocation.copy();
307        }
308        int start = cursor+2;
309        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
310          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
311          cursor++;        
312          currentLocation.incColumn();
313        }
314        if (cursor >= source.length() -1) {
315          error("Unfinished comment");
316        } else {
317          comments.add(source.substring(start, cursor).trim());
318          cursor = cursor + 2;
319          currentLocation.incColumn(2);
320        }
321      } else if (Utilities.isWhitespace(source.charAt(cursor))) {
322        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
323        cursor++;
324        currentLocation.incColumn();
325      } else {
326        done = true;
327      }
328    }
329  }
330  
331  private boolean isMetadataStart() {
332    return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3));
333  }
334  
335  private boolean isDateChar(char ch,int start) {
336    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
337    
338    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
339  }
340
341  public boolean isOp() {
342    return ExpressionNode.Operation.fromCode(current) != null;
343  }
344
345  public boolean done() {
346    return currentStart >= source.length();
347  }
348
349  public int nextId() {
350    id++;
351    return id;
352  }
353
354  public SourceLocation getCurrentStartLocation() {
355    return currentStartLocation;
356  }
357
358  // special case use
359  public void setCurrent(String current) {
360    this.current = current;
361  }
362
363  public boolean hasComment() {
364    return !done() && current.startsWith("//");
365  }
366
367  public boolean hasComments() {
368    return comments.size() > 0;
369  }
370
371
372  public List<String> getComments() {
373    return comments;
374  }
375
376  public String getAllComments() {
377    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
378    b.addAll(comments);
379    comments.clear();
380    return b.toString();
381  }
382
383  public String getFirstComment() {
384    if (hasComments()) {
385      String s = comments.get(0);
386      comments.remove(0);
387      return s;      
388    } else {
389      return null;
390    }
391  }
392
393  public boolean hasToken(String kw) {
394    return !done() && kw.equals(current);
395  }
396  public boolean hasToken(String... names) {
397    if (done()) 
398      return false;
399    for (String s : names)
400      if (s.equals(current))
401        return true;
402    return false;
403  }
404  
405  public void token(String kw) throws FHIRLexerException {
406    if (!kw.equals(current)) 
407      throw error("Found \""+current+"\" expecting \""+kw+"\"");
408    next();
409  }
410  
411  public String readConstant(String desc) throws FHIRLexerException {
412    if (!isStringConstant())
413      throw error("Found "+current+" expecting \"["+desc+"]\"");
414
415    return processConstant(take());
416  }
417
418  public String readFixedName(String desc) throws FHIRLexerException {
419    if (!isFixedName())
420      throw error("Found "+current+" expecting \"["+desc+"]\"");
421
422    return processFixedName(take());
423  }
424
425  public String processConstant(String s) throws FHIRLexerException {
426    StringBuilder b = new StringBuilder();
427    int i = 1;
428    while (i < s.length()-1) {
429      char ch = s.charAt(i);
430      if (ch == '\\') {
431        i++;
432        switch (s.charAt(i)) {
433        case 't': 
434          b.append('\t');
435          break;
436        case 'r':
437          b.append('\r');
438          break;
439        case 'n': 
440          b.append('\n');
441          break;
442        case 'f': 
443          b.append('\f');
444          break;
445        case '\'':
446          b.append('\'');
447          break;
448        case '"':
449          b.append('"');
450          break;
451        case '`':
452          b.append('`');
453          break;
454        case '\\': 
455          b.append('\\');
456          break;
457        case '/': 
458          b.append('/');
459          break;
460        case 'u':
461          i++;
462          int uc = Integer.parseInt(s.substring(i, i+4), 16);
463          b.append(Character.toString(uc));
464          i = i + 4;
465          break;
466        default:
467          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
468        }
469      } else {
470        b.append(ch);
471        i++;
472      }
473    }
474    return b.toString();
475  }
476  
477  public String processFixedName(String s) throws FHIRLexerException {
478    StringBuilder b = new StringBuilder();
479    int i = 1;
480    while (i < s.length()-1) {
481      char ch = s.charAt(i);
482      if (ch == '\\') {
483        i++;
484        switch (s.charAt(i)) {
485        case 't': 
486          b.append('\t');
487          break;
488        case 'r':
489          b.append('\r');
490          break;
491        case 'n': 
492          b.append('\n');
493          break;
494        case 'f': 
495          b.append('\f');
496          break;
497        case '\'':
498          b.append('\'');
499          break;
500        case '"':
501          b.append('"');
502          break;
503        case '\\': 
504          b.append('\\');
505          break;
506        case '/': 
507          b.append('/');
508          break;
509        case 'u':
510          i++;
511          int uc = Integer.parseInt(s.substring(i, i+4), 32);
512          b.append(Character.toString(uc));
513          i = i + 4;
514          break;
515        default:
516          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
517        }
518      } else {
519        b.append(ch);
520        i++;
521      }
522    }
523    return b.toString();
524  }
525
526  public void skipToken(String token) throws FHIRLexerException {
527    if (getCurrent().equals(token))
528      next();
529    
530  }
531  
532  public String takeDottedToken() throws FHIRLexerException {
533    StringBuilder b = new StringBuilder();
534    b.append(take());
535    while (!done() && getCurrent().equals(".")) {
536      b.append(take());
537      b.append(take());
538    }
539    return b.toString();
540  }
541
542  public void skipComments() throws FHIRLexerException {
543    while (!done() && hasComment())
544      next();
545  }
546
547  public int getCurrentStart() {
548    return currentStart;
549  }
550  public String getSource() {
551    return source;
552  }
553  public boolean isLiquidMode() {
554    return liquidMode;
555  }
556  public void setLiquidMode(boolean liquidMode) {
557    this.liquidMode = liquidMode;
558  }
559  public SourceLocation getCommentLocation() {
560    return this.commentLocation;
561  }
562  public boolean isMetadataFormat() {
563    return metadataFormat;
564  }
565  public void setMetadataFormat(boolean metadataFormat) {
566    this.metadataFormat = metadataFormat;
567  }
568  public List<String> cloneComments() {
569    List<String> res = new ArrayList<>();
570    res.addAll(getComments());
571    return res;
572  }
573  public String tokenWithTrailingComment(String token) {
574    int line = getCurrentLocation().getLine();
575    token(token);
576    if (getComments().size() > 0 && getCommentLocation().getLine() == line) {
577      return getFirstComment();
578    } else {
579      return null;
580    }
581  }
582  public boolean isAllowDoubleQuotes() {
583    return allowDoubleQuotes;
584  }
585}