001package org.hl7.fhir.r4.fhirpath;
002
003import org.hl7.fhir.exceptions.FHIRException;
004import org.hl7.fhir.utilities.SourceLocation;
005import org.hl7.fhir.utilities.Utilities;
006
007// shared lexer for concrete syntaxes 
008// - FluentPath
009// - Mapping language
010
011public class FHIRLexer {
012  public class FHIRLexerException extends FHIRException {
013
014    public FHIRLexerException() {
015      super();
016    }
017
018    public FHIRLexerException(String message, Throwable cause) {
019      super(message, cause);
020    }
021
022    public FHIRLexerException(String message) {
023      super(message);
024    }
025
026    public FHIRLexerException(Throwable cause) {
027      super(cause);
028    }
029
030  }
031
032  private String source;
033  private int cursor;
034  private int currentStart;
035  private String current;
036  private SourceLocation currentLocation;
037  private SourceLocation currentStartLocation;
038  private int id;
039  private String name;
040
041  public FHIRLexer(String source, String name) throws FHIRLexerException {
042    this.source = source;
043    this.name = name == null ? "??" : name;
044    currentLocation = new SourceLocation(1, 1);
045    next();
046  }
047
048  public FHIRLexer(String source, int i) throws FHIRLexerException {
049    this.source = source;
050    this.cursor = i;
051    currentLocation = new SourceLocation(1, 1);
052    next();
053  }
054
055  public String getCurrent() {
056    return current;
057  }
058
059  public SourceLocation getCurrentLocation() {
060    return currentLocation;
061  }
062
063  public boolean isConstant() {
064    return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@'
065        || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+'
066        || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false")
067        || current.equals("{}");
068  }
069
070  public boolean isFixedName() {
071    return current != null && (current.charAt(0) == '`');
072  }
073
074  public boolean isStringConstant() {
075    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
076  }
077
078  public String take() throws FHIRLexerException {
079    String s = current;
080    next();
081    return s;
082  }
083
084  public int takeInt() throws FHIRLexerException {
085    String s = current;
086    if (!Utilities.isInteger(s))
087      throw error("Found " + current + " expecting an integer");
088    next();
089    return Integer.parseInt(s);
090  }
091
092  public boolean isToken() {
093    if (Utilities.noString(current))
094      return false;
095
096    if (current.startsWith("$"))
097      return true;
098
099    if (current.equals("*") || current.equals("**"))
100      return true;
101
102    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z')
103        || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
104      for (int i = 1; i < current.length(); i++)
105        if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z')
106            || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z')
107            || (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
108          return false;
109      return true;
110    }
111    return false;
112  }
113
114  public FHIRLexerException error(String msg) {
115    return error(msg, currentLocation.toString());
116  }
117
118  public FHIRLexerException error(String msg, String location) {
119    return new FHIRLexerException("Error in " + name + " at " + location + ": " + msg);
120  }
121
122  public void next() throws FHIRLexerException {
123    skipWhitespaceAndComments();
124    current = null;
125    currentStart = cursor;
126    currentStartLocation = currentLocation;
127    if (cursor < source.length()) {
128      char ch = source.charAt(cursor);
129      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') {
130        cursor++;
131        if (cursor < source.length()
132            && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')
133            || (ch == '-' && source.charAt(cursor) == '>'))
134          cursor++;
135        current = source.substring(currentStart, cursor);
136      } else if (ch == '.') {
137        cursor++;
138        if (cursor < source.length() && (source.charAt(cursor) == '.'))
139          cursor++;
140        current = source.substring(currentStart, cursor);
141      } else if (ch >= '0' && ch <= '9') {
142        cursor++;
143        boolean dotted = false;
144        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9')
145            || (source.charAt(cursor) == '.') && !dotted)) {
146          if (source.charAt(cursor) == '.')
147            dotted = true;
148          cursor++;
149        }
150        if (source.charAt(cursor - 1) == '.')
151          cursor--;
152        current = source.substring(currentStart, cursor);
153      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
154        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z')
155            || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')
156            || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_'))
157          cursor++;
158        current = source.substring(currentStart, cursor);
159      } else if (ch == '%') {
160        cursor++;
161        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
162          cursor++;
163          while (cursor < source.length() && (source.charAt(cursor) != '`'))
164            cursor++;
165          cursor++;
166        } else
167          while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z')
168              || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')
169              || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':'
170              || source.charAt(cursor) == '-'))
171            cursor++;
172        current = source.substring(currentStart, cursor);
173      } else if (ch == '/') {
174        cursor++;
175        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
176          // this is en error - should already have been skipped
177          error("This shoudn't happen?");
178        }
179        current = source.substring(currentStart, cursor);
180      } else if (ch == '$') {
181        cursor++;
182        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
183          cursor++;
184        current = source.substring(currentStart, cursor);
185      } else if (ch == '{') {
186        cursor++;
187        ch = source.charAt(cursor);
188        if (ch == '}')
189          cursor++;
190        current = source.substring(currentStart, cursor);
191      } else if (ch == '"') {
192        cursor++;
193        boolean escape = false;
194        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
195          if (escape)
196            escape = false;
197          else
198            escape = (source.charAt(cursor) == '\\');
199          cursor++;
200        }
201        if (cursor == source.length())
202          throw error("Unterminated string");
203        cursor++;
204        current = "\"" + source.substring(currentStart + 1, cursor - 1) + "\"";
205      } else if (ch == '`') {
206        cursor++;
207        boolean escape = false;
208        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
209          if (escape)
210            escape = false;
211          else
212            escape = (source.charAt(cursor) == '\\');
213          cursor++;
214        }
215        if (cursor == source.length())
216          throw error("Unterminated string");
217        cursor++;
218        current = "`" + source.substring(currentStart + 1, cursor - 1) + "`";
219      } else if (ch == '\'') {
220        cursor++;
221        char ech = ch;
222        boolean escape = false;
223        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
224          if (escape)
225            escape = false;
226          else
227            escape = (source.charAt(cursor) == '\\');
228          cursor++;
229        }
230        if (cursor == source.length())
231          throw error("Unterminated string");
232        cursor++;
233        current = source.substring(currentStart, cursor);
234        if (ech == '\'')
235          current = "\'" + current.substring(1, current.length() - 1) + "\'";
236      } else if (ch == '`') {
237        cursor++;
238        boolean escape = false;
239        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
240          if (escape)
241            escape = false;
242          else
243            escape = (source.charAt(cursor) == '\\');
244          cursor++;
245        }
246        if (cursor == source.length())
247          throw error("Unterminated string");
248        cursor++;
249        current = "`" + source.substring(currentStart + 1, cursor - 1) + "`";
250      } else if (ch == '@') {
251        int start = cursor;
252        cursor++;
253        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
254          cursor++;
255        current = source.substring(currentStart, cursor);
256      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
257        cursor++;
258        current = source.substring(currentStart, cursor);
259      }
260    }
261  }
262
263  private void skipWhitespaceAndComments() {
264    boolean last13 = false;
265    boolean done = false;
266    while (cursor < source.length() && !done) {
267      if (cursor < source.length() - 1 && "//".equals(source.substring(cursor, cursor + 2))) {
268        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n'))
269          cursor++;
270      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor + 2))) {
271        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor + 2))) {
272          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
273          cursor++;
274        }
275        if (cursor >= source.length() - 1) {
276          error("Unfinished comment");
277        } else {
278          cursor = cursor + 2;
279        }
280      } else if (Character.isWhitespace(source.charAt(cursor))) {
281        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
282        cursor++;
283      } else {
284        done = true;
285      }
286    }
287  }
288
289  private boolean isDateChar(char ch, int start) {
290    int eot = source.charAt(start + 1) == 'T' ? 10 : 20;
291
292    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch)
293        || (cursor - start == eot && ch == '.' && cursor < source.length() - 1
294            && Character.isDigit(source.charAt(cursor + 1)));
295  }
296
297  public boolean isOp() {
298    return ExpressionNode.Operation.fromCode(current) != null;
299  }
300
301  public boolean done() {
302    return currentStart >= source.length();
303  }
304
305  public int nextId() {
306    id++;
307    return id;
308  }
309
310  public SourceLocation getCurrentStartLocation() {
311    return currentStartLocation;
312  }
313
314  // special case use
315  public void setCurrent(String current) {
316    this.current = current;
317  }
318
319  public boolean hasComment() {
320    return !done() && current.startsWith("//");
321  }
322
323  public boolean hasToken(String kw) {
324    return !done() && kw.equals(current);
325  }
326
327  public boolean hasToken(String... names) {
328    if (done())
329      return false;
330    for (String s : names)
331      if (s.equals(current))
332        return true;
333    return false;
334  }
335
336  public void token(String kw) throws FHIRLexerException {
337    if (!kw.equals(current))
338      throw error("Found \"" + current + "\" expecting \"" + kw + "\"");
339    next();
340  }
341
342  public String readConstant(String desc) throws FHIRLexerException {
343    if (!isStringConstant())
344      throw error("Found " + current + " expecting \"[" + desc + "]\"");
345
346    return processConstant(take());
347  }
348
349  public String readFixedName(String desc) throws FHIRLexerException {
350    if (!isFixedName())
351      throw error("Found " + current + " expecting \"[" + desc + "]\"");
352
353    return processFixedName(take());
354  }
355
356  public String processConstant(String s) throws FHIRLexerException {
357    StringBuilder b = new StringBuilder();
358    int i = 1;
359    while (i < s.length() - 1) {
360      char ch = s.charAt(i);
361      if (ch == '\\') {
362        i++;
363        switch (s.charAt(i)) {
364        case 't':
365          b.append('\t');
366          break;
367        case 'r':
368          b.append('\r');
369          break;
370        case 'n':
371          b.append('\n');
372          break;
373        case 'f':
374          b.append('\f');
375          break;
376        case '\'':
377          b.append('\'');
378          break;
379        case '"':
380          b.append('"');
381          break;
382        case '`':
383          b.append('`');
384          break;
385        case '\\':
386          b.append('\\');
387          break;
388        case '/':
389          b.append('/');
390          break;
391        case 'u':
392          i++;
393          int uc = Integer.parseInt(s.substring(i, i + 4), 16);
394          b.append(Character.toString(uc));
395          i = i + 4;
396          break;
397        default:
398          throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i));
399        }
400      } else {
401        b.append(ch);
402        i++;
403      }
404    }
405    return b.toString();
406  }
407
408  public String processFixedName(String s) throws FHIRLexerException {
409    StringBuilder b = new StringBuilder();
410    int i = 1;
411    while (i < s.length() - 1) {
412      char ch = s.charAt(i);
413      if (ch == '\\') {
414        i++;
415        switch (s.charAt(i)) {
416        case 't':
417          b.append('\t');
418          break;
419        case 'r':
420          b.append('\r');
421          break;
422        case 'n':
423          b.append('\n');
424          break;
425        case 'f':
426          b.append('\f');
427          break;
428        case '\'':
429          b.append('\'');
430          break;
431        case '"':
432          b.append('"');
433          break;
434        case '\\':
435          b.append('\\');
436          break;
437        case '/':
438          b.append('/');
439          break;
440        case 'u':
441          i++;
442          int uc = Integer.parseInt(s.substring(i, i + 4), 16);
443          b.append(Character.toString(uc));
444          i = i + 4;
445          break;
446        default:
447          throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i));
448        }
449      } else {
450        b.append(ch);
451        i++;
452      }
453    }
454    return b.toString();
455  }
456
457  public void skipToken(String token) throws FHIRLexerException {
458    if (getCurrent().equals(token))
459      next();
460
461  }
462
463  public String takeDottedToken() throws FHIRLexerException {
464    StringBuilder b = new StringBuilder();
465    b.append(take());
466    while (!done() && getCurrent().equals(".")) {
467      b.append(take());
468      b.append(take());
469    }
470    return b.toString();
471  }
472
473  public void skipComments() throws FHIRLexerException {
474    while (!done() && hasComment())
475      next();
476  }
477
478  public int getCurrentStart() {
479    return currentStart;
480  }
481
482}