001package org.hl7.fhir.r5.utils;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.hl7.fhir.exceptions.FHIRException;
007
008/*
009  Copyright (c) 2011+, HL7, Inc.
010  All rights reserved.
011  
012  Redistribution and use in source and binary forms, with or without modification, 
013  are permitted provided that the following conditions are met:
014    
015   * Redistributions of source code must retain the above copyright notice, this 
016     list of conditions and the following disclaimer.
017   * Redistributions in binary form must reproduce the above copyright notice, 
018     this list of conditions and the following disclaimer in the documentation 
019     and/or other materials provided with the distribution.
020   * Neither the name of HL7 nor the names of its contributors may be used to 
021     endorse or promote products derived from this software without specific 
022     prior written permission.
023  
024  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
025  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
026  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
027  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
028  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
029  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
030  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
031  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
032  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
033  POSSIBILITY OF SUCH DAMAGE.
034  
035 */
036
037
038
039import org.hl7.fhir.r5.model.ExpressionNode;
040import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
041import org.hl7.fhir.utilities.SourceLocation;
042import org.hl7.fhir.utilities.Utilities;
043
044// shared lexer for concrete syntaxes 
045// - FluentPath
046// - Mapping language
047
048public class FHIRLexer {
049  public class FHIRLexerException extends FHIRException {
050
051    private SourceLocation location;
052
053//    public FHIRLexerException() {
054//      super();
055//    }
056//
057//    public FHIRLexerException(String message, Throwable cause) {
058//      super(message, cause);
059//    }
060//
061//    public FHIRLexerException(String message) {
062//      super(message);
063//    }
064//
065//    public FHIRLexerException(Throwable cause) {
066//      super(cause);
067//    }
068
069    public FHIRLexerException(String message, SourceLocation location) {
070      super(message);
071      this.location = location;
072    }
073
074    public SourceLocation getLocation() {
075      return location;
076    }
077
078  }
079  private String source;
080  private int cursor;
081  private int currentStart;
082  private String current;
083  private List<String> comments = new ArrayList<>();
084  private SourceLocation currentLocation;
085  private SourceLocation currentStartLocation;
086  private int id;
087  private String name;
088  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
089  private SourceLocation commentLocation;
090  private boolean metadataFormat;
091  private boolean allowDoubleQuotes;
092
093  public FHIRLexer(String source, String name) throws FHIRLexerException {
094    this.source = source == null ? "" : source;
095    this.name = name == null ? "??" : name;
096    currentLocation = new SourceLocation(1, 1);
097    next();
098  }
099  public FHIRLexer(String source, int i) throws FHIRLexerException {
100    this.source = source;
101    this.cursor = i;
102    currentLocation = new SourceLocation(1, 1);
103    next();
104  }
105  public FHIRLexer(String source, int i, boolean allowDoubleQuotes) throws FHIRLexerException {
106    this.source = source;
107    this.cursor = i;
108    this.allowDoubleQuotes =  allowDoubleQuotes;
109    currentLocation = new SourceLocation(1, 1);
110    next();
111  }
112  public FHIRLexer(String source, String name, boolean metadataFormat, boolean allowDoubleQuotes) throws FHIRLexerException {
113    this.source = source == null ? "" : source;
114    this.name = name == null ? "??" : name;
115    this.metadataFormat = metadataFormat;
116    this.allowDoubleQuotes =  allowDoubleQuotes;
117    currentLocation = new SourceLocation(1, 1);
118    next();
119  }
120  public String getCurrent() {
121    return current;
122  }
123  public SourceLocation getCurrentLocation() {
124    return currentLocation;
125  }
126
127  public boolean isConstant() {
128    return FHIRPathConstant.isFHIRPathConstant(current);
129  }
130
131  public boolean isFixedName() {
132    return FHIRPathConstant.isFHIRPathFixedName(current);
133  }
134
135  public boolean isStringConstant() {
136    return FHIRPathConstant.isFHIRPathStringConstant(current);
137  }
138
139  public String take() throws FHIRLexerException {
140    String s = current;
141    next();
142    return s;
143  }
144
145  public int takeInt() throws FHIRLexerException {
146    String s = current;
147    if (!Utilities.isInteger(s))
148      throw error("Found "+current+" expecting an integer");
149    next();
150    return Integer.parseInt(s);
151  }
152
153  public boolean isToken() {
154    if (Utilities.noString(current))
155      return false;
156
157    if (current.startsWith("$"))
158      return true;
159
160    if (current.equals("*") || current.equals("**"))
161      return true;
162
163    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
164      for (int i = 1; i < current.length(); i++) 
165        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
166            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
167          return false;
168      return true;
169    }
170    return false;
171  }
172
173  public FHIRLexerException error(String msg) {
174    return error(msg, currentLocation.toString(), currentLocation);
175  }
176
177  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
178    return new FHIRLexerException("Error @"+location+": "+msg, loc);
179  }
180
181  public void next() throws FHIRLexerException {
182    skipWhitespaceAndComments();
183    current = null;
184    currentStart = cursor;
185    currentStartLocation = currentLocation;
186    if (cursor < source.length()) {
187      char ch = source.charAt(cursor);
188      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
189        cursor++;
190        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
191          cursor++;
192        current = source.substring(currentStart, cursor);
193      } else if (ch == '.' ) {
194        cursor++;
195        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
196          cursor++;
197        current = source.substring(currentStart, cursor);
198      } else if (ch >= '0' && ch <= '9') {
199          cursor++;
200        boolean dotted = false;
201        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
202          if (source.charAt(cursor) == '.')
203            dotted = true;
204          cursor++;
205        }
206        if (source.charAt(cursor-1) == '.')
207          cursor--;
208        current = source.substring(currentStart, cursor);
209      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
210        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
211            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
212          cursor++;
213        current = source.substring(currentStart, cursor);
214      } else if (ch == '%') {
215        cursor++;
216        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
217          cursor++;
218          while (cursor < source.length() && (source.charAt(cursor) != '`'))
219            cursor++;
220          cursor++;
221        } else
222        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
223            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
224          cursor++;
225        current = source.substring(currentStart, cursor);
226      } else if (ch == '/') {
227        cursor++;
228        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
229          // we've run into metadata
230          cursor++;
231          cursor++;
232          current = source.substring(currentStart, cursor);
233        } else {
234          current = source.substring(currentStart, cursor);
235        }
236      } else if (ch == '$') {
237        cursor++;
238        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
239          cursor++;
240        current = source.substring(currentStart, cursor);
241      } else if (ch == '{') {
242        cursor++;
243        ch = source.charAt(cursor);
244        if (ch == '}')
245          cursor++;
246        current = source.substring(currentStart, cursor);
247      } else if (ch == '"' && allowDoubleQuotes) {
248        cursor++;
249        boolean escape = false;
250        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
251          if (escape)
252            escape = false;
253          else 
254            escape = (source.charAt(cursor) == '\\');
255          cursor++;
256        }
257        if (cursor == source.length())
258          throw error("Unterminated string");
259        cursor++;
260        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
261      } else if (ch == '`') {
262        cursor++;
263        boolean escape = false;
264        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
265          if (escape)
266            escape = false;
267          else 
268            escape = (source.charAt(cursor) == '\\');
269          cursor++;
270        }
271        if (cursor == source.length())
272          throw error("Unterminated string");
273        cursor++;
274        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
275      } else if (ch == '\''){
276        cursor++;
277        char ech = ch;
278        boolean escape = false;
279        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
280          if (escape)
281            escape = false;
282          else 
283            escape = (source.charAt(cursor) == '\\');
284          cursor++;
285        }
286        if (cursor == source.length())
287          throw error("Unterminated string");
288        cursor++;
289        current = source.substring(currentStart, cursor);
290        if (ech == '\'')
291          current = "\'"+current.substring(1, current.length() - 1)+"\'";
292      } else if (ch == '`') {
293        cursor++;
294        boolean escape = false;
295        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
296          if (escape)
297            escape = false;
298          else 
299            escape = (source.charAt(cursor) == '\\');
300          cursor++;
301        }
302        if (cursor == source.length())
303          throw error("Unterminated string");
304        cursor++;
305        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
306      } else if (ch == '|' && liquidMode) {
307        cursor++;
308        ch = source.charAt(cursor);
309        if (ch == '|')
310          cursor++;
311        current = source.substring(currentStart, cursor);
312      } else if (ch == '@'){
313        int start = cursor;
314        cursor++;
315        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
316          cursor++;          
317        current = source.substring(currentStart, cursor);
318      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
319        cursor++;
320        current = source.substring(currentStart, cursor);
321      }
322    }
323  }
324
325  private void skipWhitespaceAndComments() {
326    comments.clear();
327    commentLocation = null;
328    boolean last13 = false;
329    boolean done = false;
330    while (cursor < source.length() && !done) {
331      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2)) && !isMetadataStart()) {
332        if (commentLocation == null) {
333          commentLocation = currentLocation.copy();
334        }
335        int start = cursor+2;
336        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
337          cursor++;        
338        }
339        comments.add(source.substring(start, cursor).trim());
340      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
341        if (commentLocation == null) {
342          commentLocation = currentLocation.copy();
343        }
344        int start = cursor+2;
345        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
346          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
347          cursor++;        
348        }
349        if (cursor >= source.length() -1) {
350          error("Unfinished comment");
351        } else {
352          comments.add(source.substring(start, cursor).trim());
353          cursor = cursor + 2;
354        }
355      } else if (Utilities.isWhitespace(source.charAt(cursor))) {
356        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
357        cursor++;
358      } else {
359        done = true;
360      }
361    }
362  }
363  
364  private boolean isMetadataStart() {
365    return metadataFormat && cursor < source.length() - 2 && "///".equals(source.substring(cursor, cursor+3));
366  }
367  
368  private boolean isDateChar(char ch,int start) {
369    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
370    
371    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
372  }
373  public boolean isOp() {
374    return ExpressionNode.Operation.fromCode(current) != null;
375  }
376  public boolean done() {
377    return currentStart >= source.length();
378  }
379  public int nextId() {
380    id++;
381    return id;
382  }
383  public SourceLocation getCurrentStartLocation() {
384    return currentStartLocation;
385  }
386  
387  // special case use
388  public void setCurrent(String current) {
389    this.current = current;
390  }
391
392  public boolean hasComments() {
393    return comments.size() > 0;
394  }
395
396  public List<String> getComments() {
397    return comments;
398  }
399
400  public String getAllComments() {
401    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
402    b.addAll(comments);
403    comments.clear();
404    return b.toString();
405  }
406
407  public String getFirstComment() {
408    if (hasComments()) {
409      String s = comments.get(0);
410      comments.remove(0);
411      return s;      
412    } else {
413      return null;
414    }
415  }
416
417  public boolean hasToken(String kw) {
418    return !done() && kw.equals(current);
419  }
420  public boolean hasToken(String... names) {
421    if (done()) 
422      return false;
423    for (String s : names)
424      if (s.equals(current))
425        return true;
426    return false;
427  }
428  
429  public void token(String kw) throws FHIRLexerException {
430    if (!kw.equals(current)) 
431      throw error("Found \""+current+"\" expecting \""+kw+"\"");
432    next();
433  }
434  
435  public String readConstant(String desc) throws FHIRLexerException {
436    if (!isStringConstant())
437      throw error("Found "+current+" expecting \"["+desc+"]\"");
438
439    return processConstant(take());
440  }
441
442  public String readFixedName(String desc) throws FHIRLexerException {
443    if (!isFixedName())
444      throw error("Found "+current+" expecting \"["+desc+"]\"");
445
446    return processFixedName(take());
447  }
448
449  public String processConstant(String s) throws FHIRLexerException {
450    StringBuilder b = new StringBuilder();
451    int i = 1;
452    while (i < s.length()-1) {
453      char ch = s.charAt(i);
454      if (ch == '\\') {
455        i++;
456        switch (s.charAt(i)) {
457        case 't': 
458          b.append('\t');
459          break;
460        case 'r':
461          b.append('\r');
462          break;
463        case 'n': 
464          b.append('\n');
465          break;
466        case 'f': 
467          b.append('\f');
468          break;
469        case '\'':
470          b.append('\'');
471          break;
472        case '"':
473          b.append('"');
474          break;
475        case '`':
476          b.append('`');
477          break;
478        case '\\': 
479          b.append('\\');
480          break;
481        case '/': 
482          b.append('/');
483          break;
484        case 'u':
485          i++;
486          int uc = Integer.parseInt(s.substring(i, i+4), 16);
487          b.append((char) uc);
488          i = i + 4;
489          break;
490        default:
491          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation);
492        }
493      } else {
494        b.append(ch);
495        i++;
496      }
497    }
498    return b.toString();
499  }
500  
501  public String processFixedName(String s) throws FHIRLexerException {
502    StringBuilder b = new StringBuilder();
503    int i = 1;
504    while (i < s.length()-1) {
505      char ch = s.charAt(i);
506      if (ch == '\\') {
507        i++;
508        switch (s.charAt(i)) {
509        case 't': 
510          b.append('\t');
511          break;
512        case 'r':
513          b.append('\r');
514          break;
515        case 'n': 
516          b.append('\n');
517          break;
518        case 'f': 
519          b.append('\f');
520          break;
521        case '\'':
522          b.append('\'');
523          break;
524        case '"':
525          b.append('"');
526          break;
527        case '\\': 
528          b.append('\\');
529          break;
530        case '/': 
531          b.append('/');
532          break;
533        case 'u':
534          i++;
535          int uc = Integer.parseInt(s.substring(i, i+4), 16);
536          b.append((char) uc);
537          i = i + 4;
538          break;
539        default:
540          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation);
541        }
542      } else {
543        b.append(ch);
544        i++;
545      }
546    }
547    return b.toString();
548  }
549
550  public void skipToken(String token) throws FHIRLexerException {
551    if (getCurrent().equals(token))
552      next();
553    
554  }
555  public String takeDottedToken() throws FHIRLexerException {
556    StringBuilder b = new StringBuilder();
557    b.append(take());
558    while (!done() && getCurrent().equals(".")) {
559      b.append(take());
560      b.append(take());
561    }
562    return b.toString();
563  }
564  
565  public int getCurrentStart() {
566    return currentStart;
567  }
568  public String getSource() {
569    return source;
570  }
571  public boolean isLiquidMode() {
572    return liquidMode;
573  }
574  public void setLiquidMode(boolean liquidMode) {
575    this.liquidMode = liquidMode;
576  }
577  public SourceLocation getCommentLocation() {
578    return this.commentLocation;
579  }
580  public boolean isMetadataFormat() {
581    return metadataFormat;
582  }
583  public void setMetadataFormat(boolean metadataFormat) {
584    this.metadataFormat = metadataFormat;
585  }
586  public List<String> cloneComments() {
587    List<String> res = new ArrayList<>();
588    res.addAll(getComments());
589    return res;
590  }
591  public String tokenWithTrailingComment(String token) {
592    int line = getCurrentLocation().getLine();
593    token(token);
594    if (getComments().size() > 0 && getCommentLocation().getLine() == line) {
595      return getFirstComment();
596    } else {
597      return null;
598    }
599  }
600  public boolean isAllowDoubleQuotes() {
601    return allowDoubleQuotes;
602  }
603}