001package org.hl7.fhir.r4.fhirpath;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.hl7.fhir.exceptions.FHIRException;
007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
008import org.hl7.fhir.utilities.SourceLocation;
009import org.hl7.fhir.utilities.Utilities;
010
011// shared lexer for concrete syntaxes 
012// - FluentPath
013// - Mapping language
014
015public class FHIRLexer {
016  public class FHIRLexerException extends FHIRException {
017
018    private SourceLocation location;
019    
020    public FHIRLexerException(String message) {
021      super(message);
022    }
023    
024    public FHIRLexerException(String message, Throwable cause) {
025      super(message, cause);
026    }
027
028    public FHIRLexerException(String message, SourceLocation location) {
029      super(message);
030      this.location = location;
031    }
032
033    public SourceLocation getLocation() {
034      return location;
035    }
036
037  }
038
039  private String source;
040  private int cursor;
041  private int currentStart;
042  private String current;
043  private List<String> comments = new ArrayList<>();
044  private SourceLocation currentLocation;
045  private SourceLocation currentStartLocation;
046  private int id;
047  private String name;
048  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
049  private SourceLocation commentLocation;
050  private boolean metadataFormat;
051  private boolean allowDoubleQuotes;
052
053  public FHIRLexer(String source, String name) throws FHIRLexerException {
054    this.source = source == null ? "" : Utilities.stripBOM(source);
055    this.name = name == null ? "??" : name;
056    currentLocation = new SourceLocation(1, 1);
057    next();
058  }
059
060  public FHIRLexer(String source, int i) throws FHIRLexerException {
061    this.source = Utilities.stripBOM(source);
062    this.cursor = i;
063    currentLocation = new SourceLocation(1, 1);
064    next();
065  }
066  public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException {
067    this.source = Utilities.stripBOM(source);
068    this.cursor = i;
069    this.allowDoubleQuotes =  allowDoubleQuotes;
070    currentLocation = new SourceLocation(1, 1);
071    next();
072  }
073  public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException {
074    this.source = source == null ? "" : Utilities.stripBOM(source);
075    this.name = name == null ? "??" : name;
076    this.metadataFormat = metadataFormat;
077    this.allowDoubleQuotes =  allowDoubleQuotes;
078    currentLocation = new SourceLocation(1, 1);
079    next();
080  }
081  public String getCurrent() {
082    return current;
083  }
084
085  public SourceLocation getCurrentLocation() {
086    return currentLocation;
087  }
088
089  public boolean isConstant() {
090    return FHIRPathConstant.isFHIRPathConstant(current);
091  }
092
093  public boolean isFixedName() {
094    return FHIRPathConstant.isFHIRPathFixedName(current);
095  }
096
097  public boolean isStringConstant() {
098    return FHIRPathConstant.isFHIRPathStringConstant(current);
099  }
100
101  public String take() throws FHIRLexerException {
102    String s = current;
103    next();
104    return s;
105  }
106
107  public int takeInt() throws FHIRLexerException {
108    String s = current;
109    if (!Utilities.isInteger(s))
110      throw error("Found "+current+" expecting an integer");
111    next();
112    return Integer.parseInt(s);
113  }
114
115  public boolean isToken() {
116    if (Utilities.noString(current))
117      return false;
118
119    if (current.startsWith("$"))
120      return true;
121
122    if (current.equals("*") || current.equals("**"))
123      return true;
124
125    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
126      for (int i = 1; i < current.length(); i++) 
127        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
128            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
129          return false;
130      return true;
131    }
132    return false;
133  }
134
135  public FHIRLexerException error(String msg) {
136    return error(msg, currentLocation.toString(), currentLocation);
137  }
138
139  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
140    return new FHIRLexerException("Error @"+location+": "+msg, loc);
141  }
142
143  public void next() throws FHIRLexerException {
144    skipWhitespaceAndComments();
145    current = null;
146    currentStart = cursor;
147    currentStartLocation = currentLocation;
148    if (cursor < source.length()) {
149      char ch = source.charAt(cursor);
150      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
151        cursor++;
152        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
153          cursor++;
154        current = source.substring(currentStart, cursor);
155      } else if (ch == '.' ) {
156        cursor++;
157        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
158          cursor++;
159        current = source.substring(currentStart, cursor);
160      } else if (ch >= '0' && ch <= '9') {
161          cursor++;
162        boolean dotted = false;
163        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
164          if (source.charAt(cursor) == '.')
165            dotted = true;
166          cursor++;
167        }
168        if (source.charAt(cursor-1) == '.')
169          cursor--;
170        current = source.substring(currentStart, cursor);
171      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
172        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
173            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
174          cursor++;
175        current = source.substring(currentStart, cursor);
176      } else if (ch == '%') {
177        cursor++;
178        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
179          cursor++;
180          while (cursor < source.length() && (source.charAt(cursor) != '`'))
181            cursor++;
182          cursor++;
183        } else
184        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
185            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-' || source.charAt(cursor) == '_'))
186          cursor++;
187        current = source.substring(currentStart, cursor);
188      } else if (ch == '/') {
189        cursor++;
190        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
191          // we've run into metadata
192          cursor++;
193          cursor++;
194          current = source.substring(currentStart, cursor);
195        } else {
196          current = source.substring(currentStart, cursor);
197        }
198      } else if (ch == '$') {
199        cursor++;
200        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
201          cursor++;
202        current = source.substring(currentStart, cursor);
203      } else if (ch == '{') {
204        cursor++;
205        ch = source.charAt(cursor);
206        if (ch == '}')
207          cursor++;
208        current = source.substring(currentStart, cursor);
209      } else if (ch == '"' && allowDoubleQuotes) {
210        cursor++;
211        boolean escape = false;
212        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
213          if (escape)
214            escape = false;
215          else 
216            escape = (source.charAt(cursor) == '\\');
217          cursor++;
218        }
219        if (cursor == source.length())
220          throw error("Unterminated string");
221        cursor++;
222        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
223      } else if (ch == '`') {
224        cursor++;
225        boolean escape = false;
226        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
227          if (escape)
228            escape = false;
229          else 
230            escape = (source.charAt(cursor) == '\\');
231          cursor++;
232        }
233        if (cursor == source.length())
234          throw error("Unterminated string");
235        cursor++;
236        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
237      } else if (ch == '\''){
238        cursor++;
239        char ech = ch;
240        boolean escape = false;
241        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
242          if (escape)
243            escape = false;
244          else 
245            escape = (source.charAt(cursor) == '\\');
246          cursor++;
247        }
248        if (cursor == source.length())
249          throw error("Unterminated string");
250        cursor++;
251        current = source.substring(currentStart, cursor);
252        if (ech == '\'')
253          current = "\'"+current.substring(1, current.length() - 1)+"\'";
254      } else if (ch == '`') {
255        cursor++;
256        boolean escape = false;
257        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
258          if (escape)
259            escape = false;
260          else 
261            escape = (source.charAt(cursor) == '\\');
262          cursor++;
263        }
264        if (cursor == source.length())
265          throw error("Unterminated string");
266        cursor++;
267        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
268      } else if (ch == '|' && liquidMode) {
269        cursor++;
270        ch = source.charAt(cursor);
271        if (ch == '|')
272          cursor++;
273        current = source.substring(currentStart, cursor);
274      } else if (ch == '@'){
275        int start = cursor;
276        cursor++;
277        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
278          cursor++;          
279        current = source.substring(currentStart, cursor);
280      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
281        cursor++;
282        current = source.substring(currentStart, cursor);
283      }
284    }
285  }
286
287  private void skipWhitespaceAndComments() {
288    comments.clear();
289    commentLocation = null;
290    boolean last13 = false;
291    boolean done = false;
292    while (cursor < source.length() && !done) {
293      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) {
294        if (commentLocation == null) {
295          commentLocation = currentLocation.copy();
296        }
297        int start = cursor+2;
298        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
299          cursor++;        
300        }
301        comments.add(source.substring(start, cursor).trim());
302      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
303        if (commentLocation == null) {
304          commentLocation = currentLocation.copy();
305        }
306        int start = cursor+2;
307        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
308          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
309          cursor++;        
310        }
311        if (cursor >= source.length() -1) {
312          error("Unfinished comment");
313        } else {
314          comments.add(source.substring(start, cursor).trim());
315          cursor = cursor + 2;
316        }
317      } else if (Utilities.isWhitespace(source.charAt(cursor))) {
318        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
319        cursor++;
320      } else {
321        done = true;
322      }
323    }
324  }
325  
326  private boolean isMetadataStart() {
327    return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3));
328  }
329  
330  private boolean isDateChar(char ch,int start) {
331    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
332    
333    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
334  }
335
336  public boolean isOp() {
337    return ExpressionNode.Operation.fromCode(current) != null;
338  }
339
340  public boolean done() {
341    return currentStart >= source.length();
342  }
343
344  public int nextId() {
345    id++;
346    return id;
347  }
348
349  public SourceLocation getCurrentStartLocation() {
350    return currentStartLocation;
351  }
352
353  // special case use
354  public void setCurrent(String current) {
355    this.current = current;
356  }
357
358  public boolean hasComment() {
359    return !done() && current.startsWith("//");
360  }
361
362  public boolean hasComments() {
363    return comments.size() > 0;
364  }
365
366
367  public List<String> getComments() {
368    return comments;
369  }
370
371  public String getAllComments() {
372    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
373    b.addAll(comments);
374    comments.clear();
375    return b.toString();
376  }
377
378  public String getFirstComment() {
379    if (hasComments()) {
380      String s = comments.get(0);
381      comments.remove(0);
382      return s;      
383    } else {
384      return null;
385    }
386  }
387
388  public boolean hasToken(String kw) {
389    return !done() && kw.equals(current);
390  }
391  public boolean hasToken(String... names) {
392    if (done()) 
393      return false;
394    for (String s : names)
395      if (s.equals(current))
396        return true;
397    return false;
398  }
399  
400  public void token(String kw) throws FHIRLexerException {
401    if (!kw.equals(current)) 
402      throw error("Found \""+current+"\" expecting \""+kw+"\"");
403    next();
404  }
405  
406  public String readConstant(String desc) throws FHIRLexerException {
407    if (!isStringConstant())
408      throw error("Found "+current+" expecting \"["+desc+"]\"");
409
410    return processConstant(take());
411  }
412
413  public String readFixedName(String desc) throws FHIRLexerException {
414    if (!isFixedName())
415      throw error("Found "+current+" expecting \"["+desc+"]\"");
416
417    return processFixedName(take());
418  }
419
420  public String processConstant(String s) throws FHIRLexerException {
421    StringBuilder b = new StringBuilder();
422    int i = 1;
423    while (i < s.length()-1) {
424      char ch = s.charAt(i);
425      if (ch == '\\') {
426        i++;
427        switch (s.charAt(i)) {
428        case 't': 
429          b.append('\t');
430          break;
431        case 'r':
432          b.append('\r');
433          break;
434        case 'n': 
435          b.append('\n');
436          break;
437        case 'f': 
438          b.append('\f');
439          break;
440        case '\'':
441          b.append('\'');
442          break;
443        case '"':
444          b.append('"');
445          break;
446        case '`':
447          b.append('`');
448          break;
449        case '\\': 
450          b.append('\\');
451          break;
452        case '/': 
453          b.append('/');
454          break;
455        case 'u':
456          i++;
457          int uc = Integer.parseInt(s.substring(i, i+4), 16);
458          b.append(Character.toString(uc));
459          i = i + 4;
460          break;
461        default:
462          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
463        }
464      } else {
465        b.append(ch);
466        i++;
467      }
468    }
469    return b.toString();
470  }
471  
472  public String processFixedName(String s) throws FHIRLexerException {
473    StringBuilder b = new StringBuilder();
474    int i = 1;
475    while (i < s.length()-1) {
476      char ch = s.charAt(i);
477      if (ch == '\\') {
478        i++;
479        switch (s.charAt(i)) {
480        case 't': 
481          b.append('\t');
482          break;
483        case 'r':
484          b.append('\r');
485          break;
486        case 'n': 
487          b.append('\n');
488          break;
489        case 'f': 
490          b.append('\f');
491          break;
492        case '\'':
493          b.append('\'');
494          break;
495        case '"':
496          b.append('"');
497          break;
498        case '\\': 
499          b.append('\\');
500          break;
501        case '/': 
502          b.append('/');
503          break;
504        case 'u':
505          i++;
506          int uc = Integer.parseInt(s.substring(i, i+4), 32);
507          b.append(Character.toString(uc));
508          i = i + 4;
509          break;
510        default:
511          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
512        }
513      } else {
514        b.append(ch);
515        i++;
516      }
517    }
518    return b.toString();
519  }
520
521  public void skipToken(String token) throws FHIRLexerException {
522    if (getCurrent().equals(token))
523      next();
524    
525  }
526  
527  public String takeDottedToken() throws FHIRLexerException {
528    StringBuilder b = new StringBuilder();
529    b.append(take());
530    while (!done() && getCurrent().equals(".")) {
531      b.append(take());
532      b.append(take());
533    }
534    return b.toString();
535  }
536
537  public void skipComments() throws FHIRLexerException {
538    while (!done() && hasComment())
539      next();
540  }
541
542  public int getCurrentStart() {
543    return currentStart;
544  }
545  public String getSource() {
546    return source;
547  }
548  public boolean isLiquidMode() {
549    return liquidMode;
550  }
551  public void setLiquidMode(boolean liquidMode) {
552    this.liquidMode = liquidMode;
553  }
554  public SourceLocation getCommentLocation() {
555    return this.commentLocation;
556  }
557  public boolean isMetadataFormat() {
558    return metadataFormat;
559  }
560  public void setMetadataFormat(boolean metadataFormat) {
561    this.metadataFormat = metadataFormat;
562  }
563  public List<String> cloneComments() {
564    List<String> res = new ArrayList<>();
565    res.addAll(getComments());
566    return res;
567  }
568  public String tokenWithTrailingComment(String token) {
569    int line = getCurrentLocation().getLine();
570    token(token);
571    if (getComments().size() > 0 && getCommentLocation().getLine() == line) {
572      return getFirstComment();
573    } else {
574      return null;
575    }
576  }
577  public boolean isAllowDoubleQuotes() {
578    return allowDoubleQuotes;
579  }
580}