001package org.hl7.fhir.r5.fhirpath;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.hl7.fhir.exceptions.FHIRException;
007import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
008import org.hl7.fhir.utilities.SourceLocation;
009import org.hl7.fhir.utilities.Utilities;
010
011// shared lexer for concrete syntaxes 
012// - FluentPath
013// - Mapping language
014
015public class FHIRLexer {
016  public class FHIRLexerException extends FHIRException {
017
018    private SourceLocation location;
019
020//    public FHIRLexerException() {
021//      super();
022//    }
023//
024//    public FHIRLexerException(String message, Throwable cause) {
025//      super(message, cause);
026//    }
027//
028//    public FHIRLexerException(String message) {
029//      super(message);
030//    }
031//
032//    public FHIRLexerException(Throwable cause) {
033//      super(cause);
034//    }
035
036    public FHIRLexerException(String message, SourceLocation location) {
037      super(message);
038      this.location = location;
039    }
040
041    public SourceLocation getLocation() {
042      return location;
043    }
044
045  }
046  private String source;
047  private int cursor;
048  private int currentStart;
049  private String current;
050  private List<String> comments = new ArrayList<>();
051  private SourceLocation currentLocation;
052  private SourceLocation currentStartLocation;
053  private int id;
054  private String name;
055  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
056  private SourceLocation commentLocation;
057  private boolean metadataFormat;
058  private boolean allowDoubleQuotes;
059
060  public FHIRLexer(String source, String name) throws FHIRLexerException {
061    this.source = source == null ? "" : Utilities.stripBOM(source);
062    this.name = name == null ? "??" : name;
063    currentLocation = new SourceLocation(1, 1);
064    next();
065  }
066  public FHIRLexer(String source, int i) throws FHIRLexerException {
067    this.source = Utilities.stripBOM(source);
068    this.cursor = i;
069    currentLocation = new SourceLocation(1, 1);
070    next();
071  }
072  public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException {
073    this.source = Utilities.stripBOM(source);
074    this.cursor = i;
075    this.allowDoubleQuotes =  allowDoubleQuotes;
076    currentLocation = new SourceLocation(1, 1);
077    next();
078  }
079  public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException {
080    this.source = source == null ? "" : Utilities.stripBOM(source);
081    this.name = name == null ? "??" : name;
082    this.metadataFormat = metadataFormat;
083    this.allowDoubleQuotes =  allowDoubleQuotes;
084    currentLocation = new SourceLocation(1, 1);
085    next();
086  }
087  public String getCurrent() {
088    return current;
089  }
090  public SourceLocation getCurrentLocation() {
091    return currentLocation;
092  }
093
094  public boolean isConstant() {
095    return FHIRPathConstant.isFHIRPathConstant(current);
096  }
097
098  public boolean isFixedName() {
099    return FHIRPathConstant.isFHIRPathFixedName(current);
100  }
101
102  public boolean isStringConstant() {
103    return FHIRPathConstant.isFHIRPathStringConstant(current);
104  }
105
106  public String take() throws FHIRLexerException {
107    String s = current;
108    next();
109    return s;
110  }
111
112  public int takeInt() throws FHIRLexerException {
113    String s = current;
114    if (!Utilities.isInteger(s))
115      throw error("Found "+current+" expecting an integer");
116    next();
117    return Integer.parseInt(s);
118  }
119
120  public boolean isToken() {
121    if (Utilities.noString(current))
122      return false;
123
124    if (current.startsWith("$"))
125      return true;
126
127    if (current.equals("*") || current.equals("**"))
128      return true;
129
130    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
131      for (int i = 1; i < current.length(); i++) 
132        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
133            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
134          return false;
135      return true;
136    }
137    return false;
138  }
139
140  public FHIRLexerException error(String msg) {
141    return error(msg, currentLocation.toString(), currentLocation);
142  }
143
144  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
145    return new FHIRLexerException("Error @"+location+": "+msg, loc);
146  }
147
148  public void next() throws FHIRLexerException {
149    skipWhitespaceAndComments();
150    current = null;
151    currentStart = cursor;
152    currentStartLocation = currentLocation;
153    if (cursor < source.length()) {
154      char ch = source.charAt(cursor);
155      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
156        cursor++;
157        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
158          cursor++;
159        current = source.substring(currentStart, cursor);
160      } else if (ch == '.' ) {
161        cursor++;
162        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
163          cursor++;
164        current = source.substring(currentStart, cursor);
165      } else if (ch >= '0' && ch <= '9') {
166          cursor++;
167        boolean dotted = false;
168        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
169          if (source.charAt(cursor) == '.')
170            dotted = true;
171          cursor++;
172        }
173        if (source.charAt(cursor-1) == '.')
174          cursor--;
175        current = source.substring(currentStart, cursor);
176      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
177        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
178            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
179          cursor++;
180        current = source.substring(currentStart, cursor);
181      } else if (ch == '%') {
182        cursor++;
183        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
184          cursor++;
185          while (cursor < source.length() && (source.charAt(cursor) != '`'))
186            cursor++;
187          cursor++;
188        } else
189        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
190            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
191          cursor++;
192        current = source.substring(currentStart, cursor);
193      } else if (ch == '/') {
194        cursor++;
195        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
196          // we've run into metadata
197          cursor++;
198          cursor++;
199          current = source.substring(currentStart, cursor);
200        } else {
201          current = source.substring(currentStart, cursor);
202        }
203      } else if (ch == '$') {
204        cursor++;
205        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
206          cursor++;
207        current = source.substring(currentStart, cursor);
208      } else if (ch == '{') {
209        cursor++;
210        ch = source.charAt(cursor);
211        if (ch == '}')
212          cursor++;
213        current = source.substring(currentStart, cursor);
214      } else if (ch == '"' && allowDoubleQuotes) {
215        cursor++;
216        boolean escape = false;
217        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
218          if (escape)
219            escape = false;
220          else 
221            escape = (source.charAt(cursor) == '\\');
222          cursor++;
223        }
224        if (cursor == source.length())
225          throw error("Unterminated string");
226        cursor++;
227        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
228      } else if (ch == '`') {
229        cursor++;
230        boolean escape = false;
231        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
232          if (escape)
233            escape = false;
234          else 
235            escape = (source.charAt(cursor) == '\\');
236          cursor++;
237        }
238        if (cursor == source.length())
239          throw error("Unterminated string");
240        cursor++;
241        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
242      } else if (ch == '\''){
243        cursor++;
244        char ech = ch;
245        boolean escape = false;
246        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
247          if (escape)
248            escape = false;
249          else 
250            escape = (source.charAt(cursor) == '\\');
251          cursor++;
252        }
253        if (cursor == source.length())
254          throw error("Unterminated string");
255        cursor++;
256        current = source.substring(currentStart, cursor);
257        if (ech == '\'')
258          current = "\'"+current.substring(1, current.length() - 1)+"\'";
259      } else if (ch == '`') {
260        cursor++;
261        boolean escape = false;
262        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
263          if (escape)
264            escape = false;
265          else 
266            escape = (source.charAt(cursor) == '\\');
267          cursor++;
268        }
269        if (cursor == source.length())
270          throw error("Unterminated string");
271        cursor++;
272        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
273      } else if (ch == '|' && liquidMode) {
274        cursor++;
275        ch = source.charAt(cursor);
276        if (ch == '|')
277          cursor++;
278        current = source.substring(currentStart, cursor);
279      } else if (ch == '@'){
280        int start = cursor;
281        cursor++;
282        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
283          cursor++;          
284        current = source.substring(currentStart, cursor);
285      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
286        cursor++;
287        current = source.substring(currentStart, cursor);
288      }
289    }
290  }
291
292  private void skipWhitespaceAndComments() {
293    comments.clear();
294    commentLocation = null;
295    boolean last13 = false;
296    boolean done = false;
297    while (cursor < source.length() && !done) {
298      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) {
299        if (commentLocation == null) {
300          commentLocation = currentLocation.copy();
301        }
302        int start = cursor+2;
303        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
304          cursor++;        
305        }
306        comments.add(source.substring(start, cursor).trim());
307      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
308        if (commentLocation == null) {
309          commentLocation = currentLocation.copy();
310        }
311        int start = cursor+2;
312        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
313          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
314          cursor++;        
315        }
316        if (cursor >= source.length() -1) {
317          error("Unfinished comment");
318        } else {
319          comments.add(source.substring(start, cursor).trim());
320          cursor = cursor + 2;
321        }
322      } else if (Utilities.isWhitespace(source.charAt(cursor))) {
323        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
324        cursor++;
325      } else {
326        done = true;
327      }
328    }
329  }
330  
331  private boolean isMetadataStart() {
332    return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3));
333  }
334  
335  private boolean isDateChar(char ch,int start) {
336    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
337    
338    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
339  }
340  public boolean isOp() {
341    return ExpressionNode.Operation.fromCode(current) != null;
342  }
343  public boolean done() {
344    return currentStart >= source.length();
345  }
346  public int nextId() {
347    id++;
348    return id;
349  }
350  public SourceLocation getCurrentStartLocation() {
351    return currentStartLocation;
352  }
353  
354  // special case use
355  public void setCurrent(String current) {
356    this.current = current;
357  }
358
359  public boolean hasComments() {
360    return comments.size() > 0;
361  }
362
363  public List<String> getComments() {
364    return comments;
365  }
366
367  public String getAllComments() {
368    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
369    b.addAll(comments);
370    comments.clear();
371    return b.toString();
372  }
373
374  public String getFirstComment() {
375    if (hasComments()) {
376      String s = comments.get(0);
377      comments.remove(0);
378      return s;      
379    } else {
380      return null;
381    }
382  }
383
384  public boolean hasToken(String kw) {
385    return !done() && kw.equals(current);
386  }
387  public boolean hasToken(String... names) {
388    if (done()) 
389      return false;
390    for (String s : names)
391      if (s.equals(current))
392        return true;
393    return false;
394  }
395  
396  public void token(String kw) throws FHIRLexerException {
397    if (!kw.equals(current)) 
398      throw error("Found \""+current+"\" expecting \""+kw+"\"");
399    next();
400  }
401  
402  public String readConstant(String desc) throws FHIRLexerException {
403    if (!isStringConstant())
404      throw error("Found "+current+" expecting \"["+desc+"]\"");
405
406    return processConstant(take());
407  }
408
409  public String readFixedName(String desc) throws FHIRLexerException {
410    if (!isFixedName())
411      throw error("Found "+current+" expecting \"["+desc+"]\"");
412
413    return processFixedName(take());
414  }
415
416  public String processConstant(String s) throws FHIRLexerException {
417    StringBuilder b = new StringBuilder();
418    int i = 1;
419    while (i < s.length()-1) {
420      char ch = s.charAt(i);
421      if (ch == '\\') {
422        i++;
423        switch (s.charAt(i)) {
424        case 't': 
425          b.append('\t');
426          break;
427        case 'r':
428          b.append('\r');
429          break;
430        case 'n': 
431          b.append('\n');
432          break;
433        case 'f': 
434          b.append('\f');
435          break;
436        case '\'':
437          b.append('\'');
438          break;
439        case '"':
440          b.append('"');
441          break;
442        case '`':
443          b.append('`');
444          break;
445        case '\\': 
446          b.append('\\');
447          break;
448        case '/': 
449          b.append('/');
450          break;
451        case 'u':
452          i++;
453          int uc = Integer.parseInt(s.substring(i, i+4), 16);
454          b.append(Character.toString(uc));
455          i = i + 4;
456          break;
457        default:
458          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
459        }
460      } else {
461        b.append(ch);
462        i++;
463      }
464    }
465    return b.toString();
466  }
467  
468  public String processFixedName(String s) throws FHIRLexerException {
469    StringBuilder b = new StringBuilder();
470    int i = 1;
471    while (i < s.length()-1) {
472      char ch = s.charAt(i);
473      if (ch == '\\') {
474        i++;
475        switch (s.charAt(i)) {
476        case 't': 
477          b.append('\t');
478          break;
479        case 'r':
480          b.append('\r');
481          break;
482        case 'n': 
483          b.append('\n');
484          break;
485        case 'f': 
486          b.append('\f');
487          break;
488        case '\'':
489          b.append('\'');
490          break;
491        case '"':
492          b.append('"');
493          break;
494        case '\\': 
495          b.append('\\');
496          break;
497        case '/': 
498          b.append('/');
499          break;
500        case 'u':
501          i++;
502          int uc = Integer.parseInt(s.substring(i, i+4), 32);
503          b.append(Character.toString(uc));
504          i = i + 4;
505          break;
506        default:
507          throw new FHIRLexerException("Unknown FHIRPath character escape \\"+s.charAt(i), currentLocation);
508        }
509      } else {
510        b.append(ch);
511        i++;
512      }
513    }
514    return b.toString();
515  }
516
517  public void skipToken(String token) throws FHIRLexerException {
518    if (getCurrent().equals(token))
519      next();
520    
521  }
522  public String takeDottedToken() throws FHIRLexerException {
523    StringBuilder b = new StringBuilder();
524    b.append(take());
525    while (!done() && getCurrent().equals(".")) {
526      b.append(take());
527      b.append(take());
528    }
529    return b.toString();
530  }
531  
532  public int getCurrentStart() {
533    return currentStart;
534  }
535  public String getSource() {
536    return source;
537  }
538  public boolean isLiquidMode() {
539    return liquidMode;
540  }
541  public void setLiquidMode(boolean liquidMode) {
542    this.liquidMode = liquidMode;
543  }
544  public SourceLocation getCommentLocation() {
545    return this.commentLocation;
546  }
547  public boolean isMetadataFormat() {
548    return metadataFormat;
549  }
550  public void setMetadataFormat(boolean metadataFormat) {
551    this.metadataFormat = metadataFormat;
552  }
553  public List<String> cloneComments() {
554    List<String> res = new ArrayList<>();
555    res.addAll(getComments());
556    return res;
557  }
558  public String tokenWithTrailingComment(String token) {
559    int line = getCurrentLocation().getLine();
560    token(token);
561    if (getComments().size() > 0 && getCommentLocation().getLine() == line) {
562      return getFirstComment();
563    } else {
564      return null;
565    }
566  }
567  public boolean isAllowDoubleQuotes() {
568    return allowDoubleQuotes;
569  }
570}