001package org.hl7.fhir.dstu2.utils;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032import org.hl7.fhir.dstu2.model.ExpressionNode;
033import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation;
034import org.hl7.fhir.exceptions.FHIRException;
035import org.hl7.fhir.utilities.Utilities;
036
037// shared lexer for concrete syntaxes 
038// - FluentPath
039// - Mapping language
040
041public class FHIRLexer {
042  public class FHIRLexerException extends FHIRException {
043
044    public FHIRLexerException() {
045      super();
046    }
047
048    public FHIRLexerException(String message, Throwable cause) {
049      super(message, cause);
050    }
051
052    public FHIRLexerException(String message) {
053      super(message);
054    }
055
056    public FHIRLexerException(Throwable cause) {
057      super(cause);
058    }
059
060  }
061
062  private String path;
063  private int cursor;
064  private int currentStart;
065  private String current;
066  private SourceLocation currentLocation;
067  private SourceLocation currentStartLocation;
068  private int id;
069
070  public FHIRLexer(String source) throws FHIRLexerException {
071    this.path = source;
072    currentLocation = new SourceLocation(1, 1);
073    next();
074  }
075
076  public String getCurrent() {
077    return current;
078  }
079
080  public SourceLocation getCurrentLocation() {
081    return currentLocation;
082  }
083
084  public boolean isConstant(boolean incDoubleQuotes) {
085    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@'
086        || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+'
087        || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false")
088        || current.equals("{}");
089  }
090
091  public boolean isStringConstant() {
092    return current.charAt(0) == '\'' || current.charAt(0) == '"';
093  }
094
095  public String take() throws FHIRLexerException {
096    String s = current;
097    next();
098    return s;
099  }
100
101  public boolean isToken() {
102    if (Utilities.noString(current))
103      return false;
104
105    if (current.startsWith("$"))
106      return true;
107
108    if (current.equals("*") || current.equals("**"))
109      return true;
110
111    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z')
112        || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
113      for (int i = 1; i < current.length(); i++)
114        if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z')
115            || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z')
116            || (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
117          return false;
118      return true;
119    }
120    return false;
121  }
122
123  public FHIRLexerException error(String msg) {
124    return error(msg, currentLocation.toString());
125  }
126
127  public FHIRLexerException error(String msg, String location) {
128    return new FHIRLexerException("Error in " + path + " at " + location + ": " + msg);
129  }
130
131  public void next() throws FHIRLexerException {
132    current = null;
133    boolean last13 = false;
134    while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) {
135      if (path.charAt(cursor) == '\r') {
136        currentLocation.setLine(currentLocation.getLine() + 1);
137        currentLocation.setColumn(1);
138        last13 = true;
139      } else if (!last13 && (path.charAt(cursor) == '\n')) {
140        currentLocation.setLine(currentLocation.getLine() + 1);
141        currentLocation.setColumn(1);
142        last13 = false;
143      } else {
144        last13 = false;
145        currentLocation.setColumn(currentLocation.getColumn() + 1);
146      }
147      cursor++;
148    }
149    currentStart = cursor;
150    currentStartLocation = currentLocation;
151    if (cursor < path.length()) {
152      char ch = path.charAt(cursor);
153      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') {
154        cursor++;
155        if (cursor < path.length()
156            && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-'))
157          cursor++;
158        current = path.substring(currentStart, cursor);
159      } else if (ch == '.') {
160        cursor++;
161        if (cursor < path.length() && (path.charAt(cursor) == '.'))
162          cursor++;
163        current = path.substring(currentStart, cursor);
164      } else if (ch >= '0' && ch <= '9') {
165        cursor++;
166        boolean dotted = false;
167        while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9')
168            || (path.charAt(cursor) == '.') && !dotted)) {
169          if (path.charAt(cursor) == '.')
170            dotted = true;
171          cursor++;
172        }
173        if (path.charAt(cursor - 1) == '.')
174          cursor--;
175        current = path.substring(currentStart, cursor);
176      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
177        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z')
178            || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')
179            || (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_'))
180          cursor++;
181        current = path.substring(currentStart, cursor);
182      } else if (ch == '%') {
183        cursor++;
184        if (cursor < path.length() && (path.charAt(cursor) == '"')) {
185          cursor++;
186          while (cursor < path.length() && (path.charAt(cursor) != '"'))
187            cursor++;
188          cursor++;
189        } else
190          while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z')
191              || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')
192              || (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':'
193              || path.charAt(cursor) == '-'))
194            cursor++;
195        current = path.substring(currentStart, cursor);
196      } else if (ch == '/') {
197        cursor++;
198        if (cursor < path.length() && (path.charAt(cursor) == '/')) {
199          cursor++;
200          while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n'))
201            cursor++;
202        }
203        current = path.substring(currentStart, cursor);
204      } else if (ch == '$') {
205        cursor++;
206        while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z'))
207          cursor++;
208        current = path.substring(currentStart, cursor);
209      } else if (ch == '{') {
210        cursor++;
211        ch = path.charAt(cursor);
212        if (ch == '}')
213          cursor++;
214        current = path.substring(currentStart, cursor);
215      } else if (ch == '"') {
216        cursor++;
217        boolean escape = false;
218        while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) {
219          if (escape)
220            escape = false;
221          else
222            escape = (path.charAt(cursor) == '\\');
223          cursor++;
224        }
225        if (cursor == path.length())
226          throw error("Unterminated string");
227        cursor++;
228        current = "\"" + path.substring(currentStart + 1, cursor - 1) + "\"";
229      } else if (ch == '\'') {
230        cursor++;
231        char ech = ch;
232        boolean escape = false;
233        while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) {
234          if (escape)
235            escape = false;
236          else
237            escape = (path.charAt(cursor) == '\\');
238          cursor++;
239        }
240        if (cursor == path.length())
241          throw error("Unterminated string");
242        cursor++;
243        current = path.substring(currentStart, cursor);
244        if (ech == '\'')
245          current = "\'" + current.substring(1, current.length() - 1) + "\'";
246      } else if (ch == '@') {
247        cursor++;
248        while (cursor < path.length() && isDateChar(path.charAt(cursor)))
249          cursor++;
250        current = path.substring(currentStart, cursor);
251      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
252        cursor++;
253        current = path.substring(currentStart, cursor);
254      }
255    }
256  }
257
258  private boolean isDateChar(char ch) {
259    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
260  }
261
262  public boolean isOp() {
263    return ExpressionNode.Operation.fromCode(current) != null;
264  }
265
266  public boolean done() {
267    return currentStart >= path.length();
268  }
269
270  public int nextId() {
271    id++;
272    return id;
273  }
274
275  public SourceLocation getCurrentStartLocation() {
276    return currentStartLocation;
277  }
278
279  // special case use
280  public void setCurrent(String current) {
281    this.current = current;
282  }
283
284  public boolean hasComment() {
285    return !done() && current.startsWith("//");
286  }
287
288  public boolean hasToken(String kw) {
289    return !done() && kw.equals(current);
290  }
291
292  public void token(String kw) throws FHIRLexerException {
293    if (!kw.equals(current))
294      throw error("Found \"" + current + "\" expecting \"" + kw + "\"");
295    next();
296  }
297
298  public String readConstant(String desc) throws FHIRLexerException {
299    if (!isStringConstant())
300      throw error("Found " + current + " expecting \"[" + desc + "]\"");
301
302    return processConstant(take());
303  }
304
305  public String processConstant(String s) throws FHIRLexerException {
306    StringBuilder b = new StringBuilder();
307    int i = 1;
308    while (i < s.length() - 1) {
309      char ch = s.charAt(i);
310      if (ch == '\\') {
311        i++;
312        switch (s.charAt(i)) {
313        case 't':
314          b.append('\t');
315          break;
316        case 'r':
317          b.append('\r');
318          break;
319        case 'n':
320          b.append('\n');
321          break;
322        case 'f':
323          b.append('\f');
324          break;
325        case '\'':
326          b.append('\'');
327          break;
328        case '\\':
329          b.append('\\');
330          break;
331        case '/':
332          b.append('\\');
333          break;
334        case 'u':
335          i++;
336          int uc = Integer.parseInt(s.substring(i, i + 4), 16);
337          b.append((char) uc);
338          i = i + 4;
339          break;
340        default:
341          throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i));
342        }
343      } else {
344        b.append(ch);
345        i++;
346      }
347    }
348    return b.toString();
349
350  }
351
352  public void skipToken(String token) throws FHIRLexerException {
353    if (getCurrent().equals(token))
354      next();
355
356  }
357
358  public String takeDottedToken() throws FHIRLexerException {
359    StringBuilder b = new StringBuilder();
360    b.append(take());
361    while (!done() && getCurrent().equals(".")) {
362      b.append(take());
363      b.append(take());
364    }
365    return b.toString();
366  }
367
368  void skipComments() throws FHIRLexerException {
369    while (!done() && hasComment())
370      next();
371  }
372
373}