001package org.hl7.fhir.dstu3.utils;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import org.hl7.fhir.dstu3.model.ExpressionNode;
035import org.hl7.fhir.dstu3.model.ExpressionNode.SourceLocation;
036import org.hl7.fhir.exceptions.FHIRException;
037import org.hl7.fhir.utilities.Utilities;
038
039// shared lexer for concrete syntaxes 
040// - FluentPath
041// - Mapping language
042
043public class FHIRLexer {
044  public class FHIRLexerException extends FHIRException {
045
046    public FHIRLexerException() {
047      super();
048    }
049
050    public FHIRLexerException(String message, Throwable cause) {
051      super(message, cause);
052    }
053
054    public FHIRLexerException(String message) {
055      super(message);
056    }
057
058    public FHIRLexerException(Throwable cause) {
059      super(cause);
060    }
061
062  }
063  private String source;
064  private int cursor;
065  private int currentStart;
066  private String current;
067  private SourceLocation currentLocation;
068  private SourceLocation currentStartLocation;
069  private int id;
070
071  public FHIRLexer(String source) throws FHIRLexerException {
072    this.source = source;
073    currentLocation = new SourceLocation(1, 1);
074    next();
075  }
076  public String getCurrent() {
077    return current;
078  }
079  public SourceLocation getCurrentLocation() {
080    return currentLocation;
081  }
082
083  public boolean isConstant(boolean incDoubleQuotes) {
084    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
085        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
086        current.equals("true") || current.equals("false") || current.equals("{}");
087  }
088
089  public boolean isStringConstant() {
090    return current.charAt(0) == '\'' || current.charAt(0) == '"';
091  }
092
093  public String take() throws FHIRLexerException {
094    String s = current;
095    next();
096    return s;
097  }
098
099  public int takeInt() throws FHIRLexerException {
100    String s = current;
101    if (!Utilities.isInteger(s))
102      throw error("Found "+current+" expecting an integer");
103    next();
104    return Integer.parseInt(s);
105  }
106
107  public boolean isToken() {
108    if (Utilities.noString(current))
109      return false;
110
111    if (current.startsWith("$"))
112      return true;
113
114    if (current.equals("*") || current.equals("**"))
115      return true;
116
117    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
118      for (int i = 1; i < current.length(); i++) 
119        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
120            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
121          return false;
122      return true;
123    }
124    return false;
125  }
126
127  public FHIRLexerException error(String msg) {
128    return error(msg, currentLocation.toString());
129  }
130
131  public FHIRLexerException error(String msg, String location) {
132    return new FHIRLexerException("Error at "+location+": "+msg);
133  }
134
135  public void next() throws FHIRLexerException {
136    current = null;
137    boolean last13 = false;
138    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) {
139      if (source.charAt(cursor) == '\r') {
140        currentLocation.setLine(currentLocation.getLine() + 1);
141        currentLocation.setColumn(1);
142        last13 = true;
143      } else if (!last13 && (source.charAt(cursor) == '\n')) {
144        currentLocation.setLine(currentLocation.getLine() + 1);
145        currentLocation.setColumn(1);
146        last13 = false;
147      } else {
148        last13 = false;
149        currentLocation.setColumn(currentLocation.getColumn() + 1);
150      }
151      cursor++;
152    }
153    currentStart = cursor;
154    currentStartLocation = currentLocation;
155    if (cursor < source.length()) {
156      char ch = source.charAt(cursor);
157      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
158        cursor++;
159        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 
160          cursor++;
161        current = source.substring(currentStart, cursor);
162      } else if (ch == '.' ) {
163        cursor++;
164        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
165          cursor++;
166        current = source.substring(currentStart, cursor);
167      } else if (ch >= '0' && ch <= '9') {
168          cursor++;
169        boolean dotted = false;
170        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
171          if (source.charAt(cursor) == '.')
172            dotted = true;
173          cursor++;
174        }
175        if (source.charAt(cursor-1) == '.')
176          cursor--;
177        current = source.substring(currentStart, cursor);
178      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
179        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
180            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
181          cursor++;
182        current = source.substring(currentStart, cursor);
183      } else if (ch == '%') {
184        cursor++;
185        if (cursor < source.length() && (source.charAt(cursor) == '"')) {
186          cursor++;
187          while (cursor < source.length() && (source.charAt(cursor) != '"'))
188            cursor++;
189          cursor++;
190        } else
191        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
192            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
193          cursor++;
194        current = source.substring(currentStart, cursor);
195      } else if (ch == '/') {
196        cursor++;
197        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
198          cursor++;
199          while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
200            cursor++;
201        }
202        current = source.substring(currentStart, cursor);
203      } else if (ch == '$') {
204        cursor++;
205        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
206          cursor++;
207        current = source.substring(currentStart, cursor);
208      } else if (ch == '{') {
209        cursor++;
210        ch = source.charAt(cursor);
211        if (ch == '}')
212          cursor++;
213        current = source.substring(currentStart, cursor);
214      } else if (ch == '"'){
215        cursor++;
216        boolean escape = false;
217        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
218          if (escape)
219            escape = false;
220          else 
221            escape = (source.charAt(cursor) == '\\');
222          cursor++;
223        }
224        if (cursor == source.length())
225          throw error("Unterminated string");
226        cursor++;
227        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
228      } else if (ch == '\''){
229        cursor++;
230        char ech = ch;
231        boolean escape = false;
232        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
233          if (escape)
234            escape = false;
235          else 
236            escape = (source.charAt(cursor) == '\\');
237          cursor++;
238        }
239        if (cursor == source.length())
240          throw error("Unterminated string");
241        cursor++;
242        current = source.substring(currentStart, cursor);
243        if (ech == '\'')
244          current = "\'"+current.substring(1, current.length() - 1)+"\'";
245      } else if (ch == '@'){
246        cursor++;
247        while (cursor < source.length() && isDateChar(source.charAt(cursor)))
248          cursor++;          
249        current = source.substring(currentStart, cursor);
250      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
251        cursor++;
252        current = source.substring(currentStart, cursor);
253      }
254    }
255  }
256
257
258  private boolean isDateChar(char ch) {
259    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
260  }
261  public boolean isOp() {
262    return ExpressionNode.Operation.fromCode(current) != null;
263  }
264  public boolean done() {
265    return currentStart >= source.length();
266  }
267  public int nextId() {
268    id++;
269    return id;
270  }
271  public SourceLocation getCurrentStartLocation() {
272    return currentStartLocation;
273  }
274  
275  // special case use
276  public void setCurrent(String current) {
277    this.current = current;
278  }
279
280  public boolean hasComment() {
281    return !done() && current.startsWith("//");
282  }
283  public boolean hasToken(String kw) {
284    return !done() && kw.equals(current);
285  }
286  public boolean hasToken(String... names) {
287    if (done()) 
288      return false;
289    for (String s : names)
290      if (s.equals(current))
291        return true;
292    return false;
293  }
294  
295  public void token(String kw) throws FHIRLexerException {
296    if (!kw.equals(current)) 
297      throw error("Found \""+current+"\" expecting \""+kw+"\"");
298    next();
299  }
300  
301  public String readConstant(String desc) throws FHIRLexerException {
302    if (!isStringConstant())
303      throw error("Found "+current+" expecting \"["+desc+"]\"");
304
305    return processConstant(take());
306  }
307
308  public String processConstant(String s) throws FHIRLexerException {
309    StringBuilder b = new StringBuilder();
310    int i = 1;
311    while (i < s.length()-1) {
312      char ch = s.charAt(i);
313      if (ch == '\\') {
314        i++;
315        switch (s.charAt(i)) {
316        case 't': 
317          b.append('\t');
318          break;
319        case 'r':
320          b.append('\r');
321          break;
322        case 'n': 
323          b.append('\n');
324          break;
325        case 'f': 
326          b.append('\f');
327          break;
328        case '\'':
329          b.append('\'');
330          break;
331        case '\\': 
332          b.append('\\');
333          break;
334        case '/': 
335          b.append('\\');
336          break;
337        case 'u':
338          i++;
339          int uc = Integer.parseInt(s.substring(i, i+4), 16);
340          b.append((char) uc);
341          i = i + 4;
342          break;
343        default:
344          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
345        }
346      } else {
347        b.append(ch);
348        i++;
349      }
350    }
351    return b.toString();
352
353  }
354  public void skipToken(String token) throws FHIRLexerException {
355    if (getCurrent().equals(token))
356      next();
357    
358  }
359  public String takeDottedToken() throws FHIRLexerException {
360    StringBuilder b = new StringBuilder();
361    b.append(take());
362    while (!done() && getCurrent().equals(".")) {
363      b.append(take());
364      b.append(take());
365    }
366    return b.toString();
367  }
368  
369  void skipComments() throws FHIRLexerException {
370    while (!done() && hasComment())
371      next();
372  }
373
374}