
001package org.hl7.fhir.r5.terminologies.utilities; 002 003import org.hl7.fhir.r5.formats.JsonParser; 004import org.hl7.fhir.r5.model.*; 005import org.hl7.fhir.r5.model.ValueSet.*; 006 007import java.io.IOException; 008import java.util.*; 009 010public class VCLParser { 011 012 public static class VCLParseException extends Exception { 013 public VCLParseException(String message) { 014 super(message); 015 } 016 017 public VCLParseException(String message, int position) { 018 super(message + " at position " + position); 019 } 020 } 021 022 private enum TokenType { 023 DASH, OPEN, CLOSE, SEMI, COMMA, DOT, STAR, 024 EQ, IS_A, IS_NOT_A, DESC_OF, REGEX, IN, NOT_IN, 025 GENERALIZES, CHILD_OF, DESC_LEAF, EXISTS, 026 URI, SCODE, QUOTED_VALUE, EOF 027 } 028 029 private static class Token { 030 TokenType type; 031 String value; 032 int position; 033 034 Token(TokenType type, String value, int position) { 035 this.type = type; 036 this.value = value; 037 this.position = position; 038 } 039 040 @Override 041 public String toString() { 042 return type + "(" + value + ")"; 043 } 044 } 045 046 private static class Lexer { 047 private String input; 048 private int pos = 0; 049 050 public Lexer(String input) { 051 this.input = input.trim(); 052 } 053 054 public List<Token> tokenize() throws VCLParseException { 055 List<Token> tokens = new ArrayList<>(); 056 057 while (pos < input.length()) { 058 skipWhitespace(); 059 if (pos >= input.length()) break; 060 061 int startPos = pos; 062 char ch = input.charAt(pos); 063 064 switch (ch) { 065 case '-': tokens.add(new Token(TokenType.DASH, "-", startPos)); pos++; continue; 066 case '(': tokens.add(new Token(TokenType.OPEN, "(", startPos)); pos++; continue; 067 case ')': tokens.add(new Token(TokenType.CLOSE, ")", startPos)); pos++; continue; 068 case ';': tokens.add(new Token(TokenType.SEMI, ";", startPos)); pos++; continue; 069 case ',': tokens.add(new Token(TokenType.COMMA, ",", startPos)); pos++; continue; 070 case '.': tokens.add(new Token(TokenType.DOT, ".", startPos)); pos++; continue; 071 case '*': tokens.add(new Token(TokenType.STAR, "*", startPos)); pos++; continue; 072 case '=': tokens.add(new Token(TokenType.EQ, "=", startPos)); pos++; continue; 073 case '/': tokens.add(new Token(TokenType.REGEX, "/", startPos)); pos++; continue; 074 case '^': tokens.add(new Token(TokenType.IN, "^", startPos)); pos++; continue; 075 case '>': 076 if (peek() == '>') { 077 tokens.add(new Token(TokenType.GENERALIZES, ">>", startPos)); 078 pos += 2; 079 } else { 080 throw new VCLParseException("Unexpected character: " + ch, pos); 081 } 082 continue; 083 case '<': 084 if (peek() == '<') { 085 tokens.add(new Token(TokenType.IS_A, "<<", startPos)); 086 pos += 2; 087 } else if (peek() == '!') { 088 tokens.add(new Token(TokenType.CHILD_OF, "<!", startPos)); 089 pos += 2; 090 } else { 091 tokens.add(new Token(TokenType.DESC_OF, "<", startPos)); 092 pos++; 093 } 094 continue; 095 case '~': 096 if (peek() == '<' && peek(1) == '<') { 097 tokens.add(new Token(TokenType.IS_NOT_A, "~<<", startPos)); 098 pos += 3; 099 } else if (peek() == '^') { 100 tokens.add(new Token(TokenType.NOT_IN, "~^", startPos)); 101 pos += 2; 102 } else { 103 throw new VCLParseException("Unexpected character: " + ch, pos); 104 } 105 continue; 106 case '!': 107 if (peek() == '!' && peek(1) == '<') { 108 tokens.add(new Token(TokenType.DESC_LEAF, "!!<", startPos)); 109 pos += 3; 110 } else { 111 throw new VCLParseException("Unexpected character: " + ch, pos); 112 } 113 continue; 114 case '?': tokens.add(new Token(TokenType.EXISTS, "?", startPos)); pos++; continue; 115 case '"': 116 tokens.add(readQuotedValue(startPos)); 117 continue; 118 } 119 120 if (Character.isLetter(ch)) { 121 String value = readWhile(c -> Character.isLetterOrDigit(c) || c == ':' || c == '?' || 122 c == '&' || c == '%' || c == '+' || c == '-' || c == '.' || c == '@' || 123 c == '#' || c == '$' || c == '!' || c == '{' || c == '}' || c == '_'); 124 125 if (value.contains(":")) { 126 String restOfUri = readWhile(c -> Character.isLetterOrDigit(c) || c == '?' || 127 c == '&' || c == '%' || c == '+' || c == '-' || c == '.' || c == '@' || 128 c == '#' || c == '$' || c == '!' || c == '{' || c == '}' || c == '_' || c == '/'); 129 value += restOfUri; 130 131 if (pos < input.length() && input.charAt(pos) == '|') { 132 pos++; 133 String version = readWhile(c -> c != '(' && c != ')' && !Character.isWhitespace(c)); 134 value += "|" + version; 135 } 136 tokens.add(new Token(TokenType.URI, value, startPos)); 137 } else { 138 tokens.add(new Token(TokenType.SCODE, value, startPos)); 139 } 140 } else if (Character.isDigit(ch)) { 141 String value = readWhile(c -> Character.isLetterOrDigit(c) || c == '-' || c == '_'); 142 tokens.add(new Token(TokenType.SCODE, value, startPos)); 143 } else { 144 throw new VCLParseException("Unexpected character: " + ch, pos); 145 } 146 } 147 148 tokens.add(new Token(TokenType.EOF, "", pos)); 149 return tokens; 150 } 151 152 private Token readQuotedValue(int startPos) throws VCLParseException { 153 StringBuilder sb = new StringBuilder(); 154 pos++; 155 156 while (pos < input.length()) { 157 char ch = input.charAt(pos); 158 if (ch == '"') { 159 pos++; 160 return new Token(TokenType.QUOTED_VALUE, sb.toString(), startPos); 161 } else if (ch == '\\' && pos + 1 < input.length()) { 162 pos++; 163 char escaped = input.charAt(pos); 164 if (escaped == '"' || escaped == '\\') { 165 sb.append(escaped); 166 } else { 167 sb.append('\\').append(escaped); 168 } 169 pos++; 170 } else { 171 sb.append(ch); 172 pos++; 173 } 174 } 175 176 throw new VCLParseException("Unterminated quoted string", startPos); 177 } 178 179 private String readWhile(java.util.function.Predicate<Character> predicate) { 180 StringBuilder sb = new StringBuilder(); 181 while (pos < input.length() && predicate.test(input.charAt(pos))) { 182 sb.append(input.charAt(pos)); 183 pos++; 184 } 185 return sb.toString(); 186 } 187 188 private char peek() { 189 return peek(0); 190 } 191 192 private char peek(int offset) { 193 int peekPos = pos + 1 + offset; 194 return peekPos < input.length() ? input.charAt(peekPos) : '\0'; 195 } 196 197 private void skipWhitespace() { 198 while (pos < input.length() && Character.isWhitespace(input.charAt(pos))) { 199 pos++; 200 } 201 } 202 } 203 204 private static class Parser { 205 private List<Token> tokens; 206 private int pos = 0; 207 private ValueSet valueSet; 208 private ValueSetComposeComponent compose; 209 210 public Parser(List<Token> tokens) { 211 this.tokens = tokens; 212 this.valueSet = new ValueSet(); 213 this.valueSet.setStatus(Enumerations.PublicationStatus.DRAFT); 214 this.compose = new ValueSetComposeComponent(); 215 this.valueSet.setCompose(compose); 216 } 217 218 public ValueSet parse() throws VCLParseException { 219 parseExpr(); 220 expect(TokenType.EOF); 221 return valueSet; 222 } 223 224 private void parseExpr() throws VCLParseException { 225 parseSubExpr(false); 226 227 if (current().type == TokenType.COMMA) { 228 parseConjunction(); 229 } else if (current().type == TokenType.SEMI) { 230 parseDisjunction(); 231 } else if (current().type == TokenType.DASH) { 232 parseExclusion(); 233 } 234 } 235 236 private void parseSubExpr(boolean isExclusion) throws VCLParseException { 237 String systemUri = null; 238 239 if (current().type == TokenType.OPEN && peek().type == TokenType.URI) { 240 consume(TokenType.OPEN); 241 systemUri = current().value; 242 consume(TokenType.URI); 243 consume(TokenType.CLOSE); 244 } 245 246 if (current().type == TokenType.OPEN) { 247 consume(TokenType.OPEN); 248 249 if (current().type == TokenType.OPEN && peek().type == TokenType.URI) { 250 consume(TokenType.OPEN); 251 systemUri = current().value; 252 consume(TokenType.URI); 253 consume(TokenType.CLOSE); 254 } 255 256 if (isSimpleCodeList()) { 257 parseSimpleCodeList(systemUri, isExclusion); 258 } else { 259 parseExprWithinParentheses(isExclusion); 260 } 261 262 consume(TokenType.CLOSE); 263 } else { 264 parseSimpleExpr(systemUri, isExclusion); 265 } 266 } 267 268 private boolean isSimpleCodeList() { 269 int lookahead = pos; 270 while (lookahead < tokens.size()) { 271 Token token = tokens.get(lookahead); 272 273 if (token.type == TokenType.CLOSE) { 274 return true; 275 } 276 277 if (token.type == TokenType.OPEN && lookahead + 2 < tokens.size()) { 278 Token nextToken = tokens.get(lookahead + 1); 279 Token tokenAfterNext = tokens.get(lookahead + 2); 280 if (nextToken.type == TokenType.URI && tokenAfterNext.type == TokenType.CLOSE) { 281 lookahead += 3; 282 continue; 283 } 284 } 285 286 if (token.type == TokenType.OPEN || 287 token.type == TokenType.DASH || 288 isFilterOperator(token.type)) { 289 return false; 290 } 291 lookahead++; 292 } 293 return true; 294 } 295 296 private void parseExprWithinParentheses(boolean isExclusion) throws VCLParseException { 297 parseSubExpr(isExclusion); 298 299 while (current().type == TokenType.COMMA || current().type == TokenType.SEMI || current().type == TokenType.DASH) { 300 if (current().type == TokenType.COMMA) { 301 parseConjunctionWithFlag(isExclusion); 302 } else if (current().type == TokenType.SEMI) { 303 parseDisjunctionWithFlag(isExclusion); 304 } else if (current().type == TokenType.DASH) { 305 parseExclusion(); 306 } 307 } 308 } 309 310 private void parseSimpleCodeList(String systemUri, boolean isExclusion) throws VCLParseException { 311 ConceptSetComponent conceptSet = createConceptSet(systemUri, isExclusion); 312 313 if (current().type == TokenType.STAR) { 314 consume(TokenType.STAR); 315 conceptSet.addFilter() 316 .setProperty("concept") 317 .setOp(Enumerations.FilterOperator.EXISTS) 318 .setValue("true"); 319 return; 320 } else if (current().type == TokenType.IN) { 321 parseIncludeVs(conceptSet); 322 return; 323 } else { 324 String code = parseCode(); 325 conceptSet.addConcept().setCode(code); 326 } 327 328 while (current().type == TokenType.SEMI || current().type == TokenType.COMMA) { 329 consume(current().type); 330 331 if (current().type == TokenType.STAR) { 332 consume(TokenType.STAR); 333 conceptSet.addFilter() 334 .setProperty("concept") 335 .setOp(Enumerations.FilterOperator.EXISTS) 336 .setValue("true"); 337 } else if (current().type == TokenType.IN) { 338 parseIncludeVs(conceptSet); 339 } else { 340 String code = parseCode(); 341 conceptSet.addConcept().setCode(code); 342 } 343 } 344 } 345 346 private void parseSimpleExpr(String systemUri, boolean isExclusion) throws VCLParseException { 347 ConceptSetComponent conceptSet = createConceptSet(systemUri, isExclusion); 348 349 if (current().type == TokenType.STAR) { 350 consume(TokenType.STAR); 351 conceptSet.addFilter() 352 .setProperty("concept") 353 .setOp(Enumerations.FilterOperator.EXISTS) 354 .setValue("true"); 355 } else if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) { 356 String code = parseCode(); 357 358 if (isFilterOperator(current().type)) { 359 parseFilter(conceptSet, code); 360 } else { 361 conceptSet.addConcept().setCode(code); 362 } 363 } else if (current().type == TokenType.IN) { 364 parseIncludeVs(conceptSet); 365 } else { 366 throw new VCLParseException("Expected code, filter, or include", current().position); 367 } 368 } 369 370 private void parseFilter(ConceptSetComponent conceptSet, String property) throws VCLParseException { 371 ConceptSetFilterComponent filter = conceptSet.addFilter(); 372 filter.setProperty(property); 373 374 TokenType op = current().type; 375 consume(op); 376 377 switch (op) { 378 case EQ: 379 filter.setOp(Enumerations.FilterOperator.EQUAL); 380 filter.setValue(parseCode()); 381 break; 382 case IS_A: 383 filter.setOp(Enumerations.FilterOperator.ISA); 384 filter.setValue(parseCode()); 385 break; 386 case IS_NOT_A: 387 filter.setOp(Enumerations.FilterOperator.ISNOTA); 388 filter.setValue(parseCode()); 389 break; 390 case DESC_OF: 391 filter.setOp(Enumerations.FilterOperator.DESCENDENTOF); 392 filter.setValue(parseCode()); 393 break; 394 case REGEX: 395 filter.setOp(Enumerations.FilterOperator.REGEX); 396 filter.setValue(parseQuotedString()); 397 break; 398 case IN: 399 filter.setOp(Enumerations.FilterOperator.IN); 400 filter.setValue(parseFilterValue()); 401 break; 402 case NOT_IN: 403 filter.setOp(Enumerations.FilterOperator.NOTIN); 404 filter.setValue(parseFilterValue()); 405 break; 406 case GENERALIZES: 407 filter.setOp(Enumerations.FilterOperator.GENERALIZES); 408 filter.setValue(parseCode()); 409 break; 410 case CHILD_OF: 411 filter.setOp(Enumerations.FilterOperator.CHILDOF); 412 filter.setValue(parseCode()); 413 break; 414 case DESC_LEAF: 415 filter.setOp(Enumerations.FilterOperator.DESCENDENTLEAF); 416 filter.setValue(parseCode()); 417 break; 418 case EXISTS: 419 filter.setOp(Enumerations.FilterOperator.EXISTS); 420 filter.setValue(parseCode()); 421 break; 422 default: 423 throw new VCLParseException("Unexpected filter operator: " + op, current().position); 424 } 425 } 426 427 private void parseIncludeVs(ConceptSetComponent conceptSet) throws VCLParseException { 428 consume(TokenType.IN); 429 430 if (current().type == TokenType.URI) { 431 conceptSet.addValueSet(current().value); 432 consume(TokenType.URI); 433 } else if (current().type == TokenType.OPEN) { 434 consume(TokenType.OPEN); 435 conceptSet.addValueSet(current().value); 436 consume(TokenType.URI); 437 consume(TokenType.CLOSE); 438 } else { 439 throw new VCLParseException("Expected URI after ^", current().position); 440 } 441 } 442 443 private void parseConjunction() throws VCLParseException { 444 ConceptSetComponent currentConceptSet = getCurrentConceptSet(false); 445 446 while (current().type == TokenType.COMMA) { 447 consume(TokenType.COMMA); 448 449 if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) { 450 String code = parseCode(); 451 if (isFilterOperator(current().type)) { 452 parseFilter(currentConceptSet, code); 453 } else { 454 currentConceptSet.addConcept().setCode(code); 455 } 456 } else { 457 parseSubExpr(false); 458 } 459 } 460 } 461 462 private void parseConjunctionWithFlag(boolean isExclusion) throws VCLParseException { 463 ConceptSetComponent currentConceptSet = getCurrentConceptSet(isExclusion); 464 465 while (current().type == TokenType.COMMA) { 466 consume(TokenType.COMMA); 467 468 if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) { 469 String code = parseCode(); 470 if (isFilterOperator(current().type)) { 471 parseFilter(currentConceptSet, code); 472 } else { 473 currentConceptSet.addConcept().setCode(code); 474 } 475 } else { 476 parseSubExpr(isExclusion); 477 } 478 } 479 } 480 481 private void parseDisjunction() throws VCLParseException { 482 while (current().type == TokenType.SEMI) { 483 consume(TokenType.SEMI); 484 parseSubExpr(false); 485 } 486 } 487 488 private void parseDisjunctionWithFlag(boolean isExclusion) throws VCLParseException { 489 while (current().type == TokenType.SEMI) { 490 consume(TokenType.SEMI); 491 parseSubExpr(isExclusion); 492 } 493 } 494 495 private void parseExclusion() throws VCLParseException { 496 consume(TokenType.DASH); 497 parseSubExpr(true); 498 } 499 500 private String parseCode() throws VCLParseException { 501 if (current().type == TokenType.SCODE) { 502 String code = current().value; 503 consume(TokenType.SCODE); 504 return code; 505 } else if (current().type == TokenType.QUOTED_VALUE) { 506 String code = current().value; 507 consume(TokenType.QUOTED_VALUE); 508 return code; 509 } else { 510 throw new VCLParseException("Expected code", current().position); 511 } 512 } 513 514 private String parseQuotedString() throws VCLParseException { 515 if (current().type == TokenType.QUOTED_VALUE) { 516 String value = current().value; 517 consume(TokenType.QUOTED_VALUE); 518 return value; 519 } else { 520 throw new VCLParseException("Expected quoted string", current().position); 521 } 522 } 523 524 private String parseFilterValue() throws VCLParseException { 525 if (current().type == TokenType.OPEN) { 526 consume(TokenType.OPEN); 527 StringBuilder sb = new StringBuilder(); 528 sb.append(parseCode()); 529 530 while (current().type == TokenType.COMMA) { 531 consume(TokenType.COMMA); 532 sb.append(",").append(parseCode()); 533 } 534 535 consume(TokenType.CLOSE); 536 return sb.toString(); 537 } else if (current().type == TokenType.URI) { 538 String uri = current().value; 539 consume(TokenType.URI); 540 return uri; 541 } else { 542 return parseCode(); 543 } 544 } 545 546 private ConceptSetComponent createConceptSet(String systemUri, boolean isExclusion) { 547 ConceptSetComponent conceptSet = new ConceptSetComponent(); 548 549 if (systemUri != null) { 550 conceptSet.setSystem(systemUri); 551 } 552 553 if (isExclusion) { 554 compose.addExclude(conceptSet); 555 } else { 556 compose.addInclude(conceptSet); 557 } 558 559 return conceptSet; 560 } 561 562 private ConceptSetComponent getCurrentConceptSet(boolean isExclusion) { 563 if (isExclusion) { 564 List<ConceptSetComponent> excludes = compose.getExclude(); 565 return excludes.isEmpty() ? createConceptSet(null, true) : excludes.get(excludes.size() - 1); 566 } else { 567 List<ConceptSetComponent> includes = compose.getInclude(); 568 return includes.isEmpty() ? createConceptSet(null, false) : includes.get(includes.size() - 1); 569 } 570 } 571 572 private boolean isFilterOperator(TokenType type) { 573 return type == TokenType.EQ || type == TokenType.IS_A || type == TokenType.IS_NOT_A || 574 type == TokenType.DESC_OF || type == TokenType.REGEX || type == TokenType.IN || 575 type == TokenType.NOT_IN || type == TokenType.GENERALIZES || type == TokenType.CHILD_OF || 576 type == TokenType.DESC_LEAF || type == TokenType.EXISTS; 577 } 578 579 private Token current() { 580 return pos < tokens.size() ? tokens.get(pos) : new Token(TokenType.EOF, "", -1); 581 } 582 583 private Token peek() { 584 return pos + 1 < tokens.size() ? tokens.get(pos + 1) : new Token(TokenType.EOF, "", -1); 585 } 586 587 private void consume(TokenType expected) throws VCLParseException { 588 if (current().type != expected) { 589 throw new VCLParseException("Expected " + expected + " but got " + current().type, current().position); 590 } 591 pos++; 592 } 593 594 private void expect(TokenType expected) throws VCLParseException { 595 if (current().type != expected) { 596 throw new VCLParseException("Expected " + expected + " but got " + current().type, current().position); 597 } 598 } 599 } 600 601 public static ValueSet parse(String vclExpression) throws VCLParseException { 602 if (vclExpression == null || vclExpression.trim().isEmpty()) { 603 throw new VCLParseException("VCL expression cannot be empty"); 604 } 605 606 Lexer lexer = new Lexer(vclExpression); 607 List<Token> tokens = lexer.tokenize(); 608 609 Parser parser = new Parser(tokens); 610 return parser.parse(); 611 } 612 613 public static ValueSet parseAndId(String vclExpression) throws VCLParseException, IOException { 614 ValueSet vs = parse(vclExpression); 615 String json = new JsonParser().composeString(vs); 616 vs.setUrl("cid:" + json.hashCode()); 617 return vs; 618 } 619 620}