001package org.hl7.fhir.dstu3.fhirpath;
002
003import org.hl7.fhir.dstu3.fhirpath.ExpressionNode.SourceLocation;
004import org.hl7.fhir.exceptions.FHIRException;
005import org.hl7.fhir.utilities.Utilities;
006
007// shared lexer for concrete syntaxes 
008// - FluentPath
009// - Mapping language
010
011public class FHIRLexer {
012  public class FHIRLexerException extends FHIRException {
013
014    public FHIRLexerException() {
015      super();
016    }
017
018    public FHIRLexerException(String message, Throwable cause) {
019      super(message, cause);
020    }
021
022    public FHIRLexerException(String message) {
023      super(message);
024    }
025
026    public FHIRLexerException(Throwable cause) {
027      super(cause);
028    }
029
030  }
031  private String source;
032  private int cursor;
033  private int currentStart;
034  private String current;
035  private SourceLocation currentLocation;
036  private SourceLocation currentStartLocation;
037  private int id;
038
039  public FHIRLexer(String source) throws FHIRLexerException {
040    this.source = source;
041    currentLocation = new SourceLocation(1, 1);
042    next();
043  }
044  public String getCurrent() {
045    return current;
046  }
047  public SourceLocation getCurrentLocation() {
048    return currentLocation;
049  }
050
051  public boolean isConstant(boolean incDoubleQuotes) {
052    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
053        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
054        current.equals("true") || current.equals("false") || current.equals("{}");
055  }
056
057  public boolean isStringConstant() {
058    return current.charAt(0) == '\'' || current.charAt(0) == '"';
059  }
060
061  public String take() throws FHIRLexerException {
062    String s = current;
063    next();
064    return s;
065  }
066
067  public int takeInt() throws FHIRLexerException {
068    String s = current;
069    if (!Utilities.isInteger(s))
070      throw error("Found "+current+" expecting an integer");
071    next();
072    return Integer.parseInt(s);
073  }
074
075  public boolean isToken() {
076    if (Utilities.noString(current))
077      return false;
078
079    if (current.startsWith("$"))
080      return true;
081
082    if (current.equals("*") || current.equals("**"))
083      return true;
084
085    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
086      for (int i = 1; i < current.length(); i++) 
087        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
088            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
089          return false;
090      return true;
091    }
092    return false;
093  }
094
095  public FHIRLexerException error(String msg) {
096    return error(msg, currentLocation.toString());
097  }
098
099  public FHIRLexerException error(String msg, String location) {
100    return new FHIRLexerException("Error at "+location+": "+msg);
101  }
102
103  public void next() throws FHIRLexerException {
104    current = null;
105    boolean last13 = false;
106    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) {
107      if (source.charAt(cursor) == '\r') {
108        currentLocation.setLine(currentLocation.getLine() + 1);
109        currentLocation.setColumn(1);
110        last13 = true;
111      } else if (!last13 && (source.charAt(cursor) == '\n')) {
112        currentLocation.setLine(currentLocation.getLine() + 1);
113        currentLocation.setColumn(1);
114        last13 = false;
115      } else {
116        last13 = false;
117        currentLocation.setColumn(currentLocation.getColumn() + 1);
118      }
119      cursor++;
120    }
121    currentStart = cursor;
122    currentStartLocation = currentLocation;
123    if (cursor < source.length()) {
124      char ch = source.charAt(cursor);
125      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
126        cursor++;
127        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 
128          cursor++;
129        current = source.substring(currentStart, cursor);
130      } else if (ch == '.' ) {
131        cursor++;
132        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
133          cursor++;
134        current = source.substring(currentStart, cursor);
135      } else if (ch >= '0' && ch <= '9') {
136          cursor++;
137        boolean dotted = false;
138        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
139          if (source.charAt(cursor) == '.')
140            dotted = true;
141          cursor++;
142        }
143        if (source.charAt(cursor-1) == '.')
144          cursor--;
145        current = source.substring(currentStart, cursor);
146      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
147        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
148            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
149          cursor++;
150        current = source.substring(currentStart, cursor);
151      } else if (ch == '%') {
152        cursor++;
153        if (cursor < source.length() && (source.charAt(cursor) == '"')) {
154          cursor++;
155          while (cursor < source.length() && (source.charAt(cursor) != '"'))
156            cursor++;
157          cursor++;
158        } else
159        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
160            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
161          cursor++;
162        current = source.substring(currentStart, cursor);
163      } else if (ch == '/') {
164        cursor++;
165        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
166          cursor++;
167          while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
168            cursor++;
169        }
170        current = source.substring(currentStart, cursor);
171      } else if (ch == '$') {
172        cursor++;
173        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
174          cursor++;
175        current = source.substring(currentStart, cursor);
176      } else if (ch == '{') {
177        cursor++;
178        ch = source.charAt(cursor);
179        if (ch == '}')
180          cursor++;
181        current = source.substring(currentStart, cursor);
182      } else if (ch == '"'){
183        cursor++;
184        boolean escape = false;
185        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
186          if (escape)
187            escape = false;
188          else 
189            escape = (source.charAt(cursor) == '\\');
190          cursor++;
191        }
192        if (cursor == source.length())
193          throw error("Unterminated string");
194        cursor++;
195        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
196      } else if (ch == '\''){
197        cursor++;
198        char ech = ch;
199        boolean escape = false;
200        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
201          if (escape)
202            escape = false;
203          else 
204            escape = (source.charAt(cursor) == '\\');
205          cursor++;
206        }
207        if (cursor == source.length())
208          throw error("Unterminated string");
209        cursor++;
210        current = source.substring(currentStart, cursor);
211        if (ech == '\'')
212          current = "\'"+current.substring(1, current.length() - 1)+"\'";
213      } else if (ch == '@'){
214        cursor++;
215        while (cursor < source.length() && isDateChar(source.charAt(cursor)))
216          cursor++;          
217        current = source.substring(currentStart, cursor);
218      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
219        cursor++;
220        current = source.substring(currentStart, cursor);
221      }
222    }
223  }
224
225
226  private boolean isDateChar(char ch) {
227    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
228  }
229  public boolean isOp() {
230    return ExpressionNode.Operation.fromCode(current) != null;
231  }
232  public boolean done() {
233    return currentStart >= source.length();
234  }
235  public int nextId() {
236    id++;
237    return id;
238  }
239  public SourceLocation getCurrentStartLocation() {
240    return currentStartLocation;
241  }
242  
243  // special case use
244  public void setCurrent(String current) {
245    this.current = current;
246  }
247
248  public boolean hasComment() {
249    return !done() && current.startsWith("//");
250  }
251  public boolean hasToken(String kw) {
252    return !done() && kw.equals(current);
253  }
254  public boolean hasToken(String... names) {
255    if (done()) 
256      return false;
257    for (String s : names)
258      if (s.equals(current))
259        return true;
260    return false;
261  }
262  
263  public void token(String kw) throws FHIRLexerException {
264    if (!kw.equals(current)) 
265      throw error("Found \""+current+"\" expecting \""+kw+"\"");
266    next();
267  }
268  
269  public String readConstant(String desc) throws FHIRLexerException {
270    if (!isStringConstant())
271      throw error("Found "+current+" expecting \"["+desc+"]\"");
272
273    return processConstant(take());
274  }
275
276  public String processConstant(String s) throws FHIRLexerException {
277    StringBuilder b = new StringBuilder();
278    int i = 1;
279    while (i < s.length()-1) {
280      char ch = s.charAt(i);
281      if (ch == '\\') {
282        i++;
283        switch (s.charAt(i)) {
284        case 't': 
285          b.append('\t');
286          break;
287        case 'r':
288          b.append('\r');
289          break;
290        case 'n': 
291          b.append('\n');
292          break;
293        case 'f': 
294          b.append('\f');
295          break;
296        case '\'':
297          b.append('\'');
298          break;
299        case '\\': 
300          b.append('\\');
301          break;
302        case '/': 
303          b.append('\\');
304          break;
305        case 'u':
306          i++;
307          int uc = Integer.parseInt(s.substring(i, i+4), 16);
308          b.append((char) uc);
309          i = i + 4;
310          break;
311        default:
312          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
313        }
314      } else {
315        b.append(ch);
316        i++;
317      }
318    }
319    return b.toString();
320
321  }
322  public void skipToken(String token) throws FHIRLexerException {
323    if (getCurrent().equals(token))
324      next();
325    
326  }
327  public String takeDottedToken() throws FHIRLexerException {
328    StringBuilder b = new StringBuilder();
329    b.append(take());
330    while (!done() && getCurrent().equals(".")) {
331      b.append(take());
332      b.append(take());
333    }
334    return b.toString();
335  }
336  
337  public void skipComments() throws FHIRLexerException {
338    while (!done() && hasComment())
339      next();
340  }
341
342}