001package org.hl7.fhir.dstu3.fhirpath; 002 003import org.hl7.fhir.dstu3.fhirpath.ExpressionNode.SourceLocation; 004import org.hl7.fhir.exceptions.FHIRException; 005import org.hl7.fhir.utilities.Utilities; 006 007// shared lexer for concrete syntaxes 008// - FluentPath 009// - Mapping language 010 011public class FHIRLexer { 012 public class FHIRLexerException extends FHIRException { 013 014 public FHIRLexerException() { 015 super(); 016 } 017 018 public FHIRLexerException(String message, Throwable cause) { 019 super(message, cause); 020 } 021 022 public FHIRLexerException(String message) { 023 super(message); 024 } 025 026 public FHIRLexerException(Throwable cause) { 027 super(cause); 028 } 029 030 } 031 private String source; 032 private int cursor; 033 private int currentStart; 034 private String current; 035 private SourceLocation currentLocation; 036 private SourceLocation currentStartLocation; 037 private int id; 038 039 public FHIRLexer(String source) throws FHIRLexerException { 040 this.source = source; 041 currentLocation = new SourceLocation(1, 1); 042 next(); 043 } 044 public String getCurrent() { 045 return current; 046 } 047 public SourceLocation getCurrentLocation() { 048 return currentLocation; 049 } 050 051 public boolean isConstant(boolean incDoubleQuotes) { 052 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 053 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 054 current.equals("true") || current.equals("false") || current.equals("{}"); 055 } 056 057 public boolean isStringConstant() { 058 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 059 } 060 061 public String take() throws FHIRLexerException { 062 String s = current; 063 next(); 064 return s; 065 } 066 067 public int takeInt() throws FHIRLexerException { 068 String s = current; 069 if (!Utilities.isInteger(s)) 070 throw error("Found "+current+" expecting an integer"); 071 next(); 072 return Integer.parseInt(s); 073 } 074 075 public boolean isToken() { 076 if (Utilities.noString(current)) 077 return false; 078 079 if (current.startsWith("$")) 080 return true; 081 082 if (current.equals("*") || current.equals("**")) 083 return true; 084 085 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 086 for (int i = 1; i < current.length(); i++) 087 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 088 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 089 return false; 090 return true; 091 } 092 return false; 093 } 094 095 public FHIRLexerException error(String msg) { 096 return error(msg, currentLocation.toString()); 097 } 098 099 public FHIRLexerException error(String msg, String location) { 100 return new FHIRLexerException("Error at "+location+": "+msg); 101 } 102 103 public void next() throws FHIRLexerException { 104 current = null; 105 boolean last13 = false; 106 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) { 107 if (source.charAt(cursor) == '\r') { 108 currentLocation.setLine(currentLocation.getLine() + 1); 109 currentLocation.setColumn(1); 110 last13 = true; 111 } else if (!last13 && (source.charAt(cursor) == '\n')) { 112 currentLocation.setLine(currentLocation.getLine() + 1); 113 currentLocation.setColumn(1); 114 last13 = false; 115 } else { 116 last13 = false; 117 currentLocation.setColumn(currentLocation.getColumn() + 1); 118 } 119 cursor++; 120 } 121 currentStart = cursor; 122 currentStartLocation = currentLocation; 123 if (cursor < source.length()) { 124 char ch = source.charAt(cursor); 125 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 126 cursor++; 127 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 128 cursor++; 129 current = source.substring(currentStart, cursor); 130 } else if (ch == '.' ) { 131 cursor++; 132 if (cursor < source.length() && (source.charAt(cursor) == '.')) 133 cursor++; 134 current = source.substring(currentStart, cursor); 135 } else if (ch >= '0' && ch <= '9') { 136 cursor++; 137 boolean dotted = false; 138 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 139 if (source.charAt(cursor) == '.') 140 dotted = true; 141 cursor++; 142 } 143 if (source.charAt(cursor-1) == '.') 144 cursor--; 145 current = source.substring(currentStart, cursor); 146 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 147 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 148 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 149 cursor++; 150 current = source.substring(currentStart, cursor); 151 } else if (ch == '%') { 152 cursor++; 153 if (cursor < source.length() && (source.charAt(cursor) == '"')) { 154 cursor++; 155 while (cursor < source.length() && (source.charAt(cursor) != '"')) 156 cursor++; 157 cursor++; 158 } else 159 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 160 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 161 cursor++; 162 current = source.substring(currentStart, cursor); 163 } else if (ch == '/') { 164 cursor++; 165 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 166 cursor++; 167 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 168 cursor++; 169 } 170 current = source.substring(currentStart, cursor); 171 } else if (ch == '$') { 172 cursor++; 173 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 174 cursor++; 175 current = source.substring(currentStart, cursor); 176 } else if (ch == '{') { 177 cursor++; 178 ch = source.charAt(cursor); 179 if (ch == '}') 180 cursor++; 181 current = source.substring(currentStart, cursor); 182 } else if (ch == '"'){ 183 cursor++; 184 boolean escape = false; 185 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 186 if (escape) 187 escape = false; 188 else 189 escape = (source.charAt(cursor) == '\\'); 190 cursor++; 191 } 192 if (cursor == source.length()) 193 throw error("Unterminated string"); 194 cursor++; 195 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 196 } else if (ch == '\''){ 197 cursor++; 198 char ech = ch; 199 boolean escape = false; 200 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 201 if (escape) 202 escape = false; 203 else 204 escape = (source.charAt(cursor) == '\\'); 205 cursor++; 206 } 207 if (cursor == source.length()) 208 throw error("Unterminated string"); 209 cursor++; 210 current = source.substring(currentStart, cursor); 211 if (ech == '\'') 212 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 213 } else if (ch == '@'){ 214 cursor++; 215 while (cursor < source.length() && isDateChar(source.charAt(cursor))) 216 cursor++; 217 current = source.substring(currentStart, cursor); 218 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 219 cursor++; 220 current = source.substring(currentStart, cursor); 221 } 222 } 223 } 224 225 226 private boolean isDateChar(char ch) { 227 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 228 } 229 public boolean isOp() { 230 return ExpressionNode.Operation.fromCode(current) != null; 231 } 232 public boolean done() { 233 return currentStart >= source.length(); 234 } 235 public int nextId() { 236 id++; 237 return id; 238 } 239 public SourceLocation getCurrentStartLocation() { 240 return currentStartLocation; 241 } 242 243 // special case use 244 public void setCurrent(String current) { 245 this.current = current; 246 } 247 248 public boolean hasComment() { 249 return !done() && current.startsWith("//"); 250 } 251 public boolean hasToken(String kw) { 252 return !done() && kw.equals(current); 253 } 254 public boolean hasToken(String... names) { 255 if (done()) 256 return false; 257 for (String s : names) 258 if (s.equals(current)) 259 return true; 260 return false; 261 } 262 263 public void token(String kw) throws FHIRLexerException { 264 if (!kw.equals(current)) 265 throw error("Found \""+current+"\" expecting \""+kw+"\""); 266 next(); 267 } 268 269 public String readConstant(String desc) throws FHIRLexerException { 270 if (!isStringConstant()) 271 throw error("Found "+current+" expecting \"["+desc+"]\""); 272 273 return processConstant(take()); 274 } 275 276 public String processConstant(String s) throws FHIRLexerException { 277 StringBuilder b = new StringBuilder(); 278 int i = 1; 279 while (i < s.length()-1) { 280 char ch = s.charAt(i); 281 if (ch == '\\') { 282 i++; 283 switch (s.charAt(i)) { 284 case 't': 285 b.append('\t'); 286 break; 287 case 'r': 288 b.append('\r'); 289 break; 290 case 'n': 291 b.append('\n'); 292 break; 293 case 'f': 294 b.append('\f'); 295 break; 296 case '\'': 297 b.append('\''); 298 break; 299 case '\\': 300 b.append('\\'); 301 break; 302 case '/': 303 b.append('\\'); 304 break; 305 case 'u': 306 i++; 307 int uc = Integer.parseInt(s.substring(i, i+4), 16); 308 b.append((char) uc); 309 i = i + 4; 310 break; 311 default: 312 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 313 } 314 } else { 315 b.append(ch); 316 i++; 317 } 318 } 319 return b.toString(); 320 321 } 322 public void skipToken(String token) throws FHIRLexerException { 323 if (getCurrent().equals(token)) 324 next(); 325 326 } 327 public String takeDottedToken() throws FHIRLexerException { 328 StringBuilder b = new StringBuilder(); 329 b.append(take()); 330 while (!done() && getCurrent().equals(".")) { 331 b.append(take()); 332 b.append(take()); 333 } 334 return b.toString(); 335 } 336 337 public void skipComments() throws FHIRLexerException { 338 while (!done() && hasComment()) 339 next(); 340 } 341 342}