001package org.hl7.fhir.r5.terminologies.utilities;
002
003import org.hl7.fhir.r5.formats.JsonParser;
004import org.hl7.fhir.r5.model.*;
005import org.hl7.fhir.r5.model.ValueSet.*;
006
007import java.io.IOException;
008import java.util.*;
009
010public class VCLParser {
011
012  public static class VCLParseException extends Exception {
013    public VCLParseException(String message) {
014      super(message);
015    }
016
017    public VCLParseException(String message, int position) {
018      super(message + " at position " + position);
019    }
020  }
021
022  private enum TokenType {
023    DASH, OPEN, CLOSE, SEMI, COMMA, DOT, STAR,
024    EQ, IS_A, IS_NOT_A, DESC_OF, REGEX, IN, NOT_IN,
025    GENERALIZES, CHILD_OF, DESC_LEAF, EXISTS,
026    URI, SCODE, QUOTED_VALUE, EOF
027  }
028
029  private static class Token {
030    TokenType type;
031    String value;
032    int position;
033
034    Token(TokenType type, String value, int position) {
035      this.type = type;
036      this.value = value;
037      this.position = position;
038    }
039
040    @Override
041    public String toString() {
042      return type + "(" + value + ")";
043    }
044  }
045
046  private static class Lexer {
047    private String input;
048    private int pos = 0;
049
050    public Lexer(String input) {
051      this.input = input.trim();
052    }
053
054    public List<Token> tokenize() throws VCLParseException {
055      List<Token> tokens = new ArrayList<>();
056
057      while (pos < input.length()) {
058        skipWhitespace();
059        if (pos >= input.length()) break;
060
061        int startPos = pos;
062        char ch = input.charAt(pos);
063
064        switch (ch) {
065          case '-': tokens.add(new Token(TokenType.DASH, "-", startPos)); pos++; continue;
066          case '(': tokens.add(new Token(TokenType.OPEN, "(", startPos)); pos++; continue;
067          case ')': tokens.add(new Token(TokenType.CLOSE, ")", startPos)); pos++; continue;
068          case ';': tokens.add(new Token(TokenType.SEMI, ";", startPos)); pos++; continue;
069          case ',': tokens.add(new Token(TokenType.COMMA, ",", startPos)); pos++; continue;
070          case '.': tokens.add(new Token(TokenType.DOT, ".", startPos)); pos++; continue;
071          case '*': tokens.add(new Token(TokenType.STAR, "*", startPos)); pos++; continue;
072          case '=': tokens.add(new Token(TokenType.EQ, "=", startPos)); pos++; continue;
073          case '/': tokens.add(new Token(TokenType.REGEX, "/", startPos)); pos++; continue;
074          case '^': tokens.add(new Token(TokenType.IN, "^", startPos)); pos++; continue;
075          case '>':
076            if (peek() == '>') {
077              tokens.add(new Token(TokenType.GENERALIZES, ">>", startPos));
078              pos += 2;
079            } else {
080              throw new VCLParseException("Unexpected character: " + ch, pos);
081            }
082            continue;
083          case '<':
084            if (peek() == '<') {
085              tokens.add(new Token(TokenType.IS_A, "<<", startPos));
086              pos += 2;
087            } else if (peek() == '!') {
088              tokens.add(new Token(TokenType.CHILD_OF, "<!", startPos));
089              pos += 2;
090            } else {
091              tokens.add(new Token(TokenType.DESC_OF, "<", startPos));
092              pos++;
093            }
094            continue;
095          case '~':
096            if (peek() == '<' && peek(1) == '<') {
097              tokens.add(new Token(TokenType.IS_NOT_A, "~<<", startPos));
098              pos += 3;
099            } else if (peek() == '^') {
100              tokens.add(new Token(TokenType.NOT_IN, "~^", startPos));
101              pos += 2;
102            } else {
103              throw new VCLParseException("Unexpected character: " + ch, pos);
104            }
105            continue;
106          case '!':
107            if (peek() == '!' && peek(1) == '<') {
108              tokens.add(new Token(TokenType.DESC_LEAF, "!!<", startPos));
109              pos += 3;
110            } else {
111              throw new VCLParseException("Unexpected character: " + ch, pos);
112            }
113            continue;
114          case '?': tokens.add(new Token(TokenType.EXISTS, "?", startPos)); pos++; continue;
115          case '"':
116            tokens.add(readQuotedValue(startPos));
117            continue;
118        }
119
120        if (Character.isLetter(ch)) {
121          String value = readWhile(c -> Character.isLetterOrDigit(c) || c == ':' || c == '?' ||
122            c == '&' || c == '%' || c == '+' || c == '-' || c == '.' || c == '@' ||
123            c == '#' || c == '$' || c == '!' || c == '{' || c == '}' || c == '_');
124
125          if (value.contains(":")) {
126            String restOfUri = readWhile(c -> Character.isLetterOrDigit(c) || c == '?' ||
127              c == '&' || c == '%' || c == '+' || c == '-' || c == '.' || c == '@' ||
128              c == '#' || c == '$' || c == '!' || c == '{' || c == '}' || c == '_' || c == '/');
129            value += restOfUri;
130
131            if (pos < input.length() && input.charAt(pos) == '|') {
132              pos++;
133              String version = readWhile(c -> c != '(' && c != ')' && !Character.isWhitespace(c));
134              value += "|" + version;
135            }
136            tokens.add(new Token(TokenType.URI, value, startPos));
137          } else {
138            tokens.add(new Token(TokenType.SCODE, value, startPos));
139          }
140        } else if (Character.isDigit(ch)) {
141          String value = readWhile(c -> Character.isLetterOrDigit(c) || c == '-' || c == '_');
142          tokens.add(new Token(TokenType.SCODE, value, startPos));
143        } else {
144          throw new VCLParseException("Unexpected character: " + ch, pos);
145        }
146      }
147
148      tokens.add(new Token(TokenType.EOF, "", pos));
149      return tokens;
150    }
151
152    private Token readQuotedValue(int startPos) throws VCLParseException {
153      StringBuilder sb = new StringBuilder();
154      pos++;
155
156      while (pos < input.length()) {
157        char ch = input.charAt(pos);
158        if (ch == '"') {
159          pos++;
160          return new Token(TokenType.QUOTED_VALUE, sb.toString(), startPos);
161        } else if (ch == '\\' && pos + 1 < input.length()) {
162          pos++;
163          char escaped = input.charAt(pos);
164          if (escaped == '"' || escaped == '\\') {
165            sb.append(escaped);
166          } else {
167            sb.append('\\').append(escaped);
168          }
169          pos++;
170        } else {
171          sb.append(ch);
172          pos++;
173        }
174      }
175
176      throw new VCLParseException("Unterminated quoted string", startPos);
177    }
178
179    private String readWhile(java.util.function.Predicate<Character> predicate) {
180      StringBuilder sb = new StringBuilder();
181      while (pos < input.length() && predicate.test(input.charAt(pos))) {
182        sb.append(input.charAt(pos));
183        pos++;
184      }
185      return sb.toString();
186    }
187
188    private char peek() {
189      return peek(0);
190    }
191
192    private char peek(int offset) {
193      int peekPos = pos + 1 + offset;
194      return peekPos < input.length() ? input.charAt(peekPos) : '\0';
195    }
196
197    private void skipWhitespace() {
198      while (pos < input.length() && Character.isWhitespace(input.charAt(pos))) {
199        pos++;
200      }
201    }
202  }
203
204  private static class Parser {
205    private List<Token> tokens;
206    private int pos = 0;
207    private ValueSet valueSet;
208    private ValueSetComposeComponent compose;
209
210    public Parser(List<Token> tokens) {
211      this.tokens = tokens;
212      this.valueSet = new ValueSet();
213      this.valueSet.setStatus(Enumerations.PublicationStatus.DRAFT);
214      this.compose = new ValueSetComposeComponent();
215      this.valueSet.setCompose(compose);
216    }
217
218    public ValueSet parse() throws VCLParseException {
219      parseExpr();
220      expect(TokenType.EOF);
221      return valueSet;
222    }
223
224    private void parseExpr() throws VCLParseException {
225      parseSubExpr(false);
226
227      if (current().type == TokenType.COMMA) {
228        parseConjunction();
229      } else if (current().type == TokenType.SEMI) {
230        parseDisjunction();
231      } else if (current().type == TokenType.DASH) {
232        parseExclusion();
233      }
234    }
235
236    private void parseSubExpr(boolean isExclusion) throws VCLParseException {
237      String systemUri = null;
238
239      if (current().type == TokenType.OPEN && peek().type == TokenType.URI) {
240        consume(TokenType.OPEN);
241        systemUri = current().value;
242        consume(TokenType.URI);
243        consume(TokenType.CLOSE);
244      }
245
246      if (current().type == TokenType.OPEN) {
247        consume(TokenType.OPEN);
248
249        if (current().type == TokenType.OPEN && peek().type == TokenType.URI) {
250          consume(TokenType.OPEN);
251          systemUri = current().value;
252          consume(TokenType.URI);
253          consume(TokenType.CLOSE);
254        }
255
256        if (isSimpleCodeList()) {
257          parseSimpleCodeList(systemUri, isExclusion);
258        } else {
259          parseExprWithinParentheses(isExclusion);
260        }
261
262        consume(TokenType.CLOSE);
263      } else {
264        parseSimpleExpr(systemUri, isExclusion);
265      }
266    }
267
268    private boolean isSimpleCodeList() {
269      int lookahead = pos;
270      while (lookahead < tokens.size()) {
271        Token token = tokens.get(lookahead);
272
273        if (token.type == TokenType.CLOSE) {
274          return true;
275        }
276
277        if (token.type == TokenType.OPEN && lookahead + 2 < tokens.size()) {
278          Token nextToken = tokens.get(lookahead + 1);
279          Token tokenAfterNext = tokens.get(lookahead + 2);
280          if (nextToken.type == TokenType.URI && tokenAfterNext.type == TokenType.CLOSE) {
281            lookahead += 3;
282            continue;
283          }
284        }
285
286        if (token.type == TokenType.OPEN ||
287          token.type == TokenType.DASH ||
288          isFilterOperator(token.type)) {
289          return false;
290        }
291        lookahead++;
292      }
293      return true;
294    }
295
296    private void parseExprWithinParentheses(boolean isExclusion) throws VCLParseException {
297      parseSubExpr(isExclusion);
298
299      while (current().type == TokenType.COMMA || current().type == TokenType.SEMI || current().type == TokenType.DASH) {
300        if (current().type == TokenType.COMMA) {
301          parseConjunctionWithFlag(isExclusion);
302        } else if (current().type == TokenType.SEMI) {
303          parseDisjunctionWithFlag(isExclusion);
304        } else if (current().type == TokenType.DASH) {
305          parseExclusion();
306        }
307      }
308    }
309
310    private void parseSimpleCodeList(String systemUri, boolean isExclusion) throws VCLParseException {
311      ConceptSetComponent conceptSet = createConceptSet(systemUri, isExclusion);
312
313      if (current().type == TokenType.STAR) {
314        consume(TokenType.STAR);
315        conceptSet.addFilter()
316          .setProperty("concept")
317          .setOp(Enumerations.FilterOperator.EXISTS)
318          .setValue("true");
319        return;
320      } else if (current().type == TokenType.IN) {
321        parseIncludeVs(conceptSet);
322        return;
323      } else {
324        String code = parseCode();
325        conceptSet.addConcept().setCode(code);
326      }
327
328      while (current().type == TokenType.SEMI || current().type == TokenType.COMMA) {
329        consume(current().type);
330
331        if (current().type == TokenType.STAR) {
332          consume(TokenType.STAR);
333          conceptSet.addFilter()
334            .setProperty("concept")
335            .setOp(Enumerations.FilterOperator.EXISTS)
336            .setValue("true");
337        } else if (current().type == TokenType.IN) {
338          parseIncludeVs(conceptSet);
339        } else {
340          String code = parseCode();
341          conceptSet.addConcept().setCode(code);
342        }
343      }
344    }
345
346    private void parseSimpleExpr(String systemUri, boolean isExclusion) throws VCLParseException {
347      ConceptSetComponent conceptSet = createConceptSet(systemUri, isExclusion);
348
349      if (current().type == TokenType.STAR) {
350        consume(TokenType.STAR);
351        conceptSet.addFilter()
352          .setProperty("concept")
353          .setOp(Enumerations.FilterOperator.EXISTS)
354          .setValue("true");
355      } else if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) {
356        String code = parseCode();
357
358        if (isFilterOperator(current().type)) {
359          parseFilter(conceptSet, code);
360        } else {
361          conceptSet.addConcept().setCode(code);
362        }
363      } else if (current().type == TokenType.IN) {
364        parseIncludeVs(conceptSet);
365      } else {
366        throw new VCLParseException("Expected code, filter, or include", current().position);
367      }
368    }
369
370    private void parseFilter(ConceptSetComponent conceptSet, String property) throws VCLParseException {
371      ConceptSetFilterComponent filter = conceptSet.addFilter();
372      filter.setProperty(property);
373
374      TokenType op = current().type;
375      consume(op);
376
377      switch (op) {
378        case EQ:
379          filter.setOp(Enumerations.FilterOperator.EQUAL);
380          filter.setValue(parseCode());
381          break;
382        case IS_A:
383          filter.setOp(Enumerations.FilterOperator.ISA);
384          filter.setValue(parseCode());
385          break;
386        case IS_NOT_A:
387          filter.setOp(Enumerations.FilterOperator.ISNOTA);
388          filter.setValue(parseCode());
389          break;
390        case DESC_OF:
391          filter.setOp(Enumerations.FilterOperator.DESCENDENTOF);
392          filter.setValue(parseCode());
393          break;
394        case REGEX:
395          filter.setOp(Enumerations.FilterOperator.REGEX);
396          filter.setValue(parseQuotedString());
397          break;
398        case IN:
399          filter.setOp(Enumerations.FilterOperator.IN);
400          filter.setValue(parseFilterValue());
401          break;
402        case NOT_IN:
403          filter.setOp(Enumerations.FilterOperator.NOTIN);
404          filter.setValue(parseFilterValue());
405          break;
406        case GENERALIZES:
407          filter.setOp(Enumerations.FilterOperator.GENERALIZES);
408          filter.setValue(parseCode());
409          break;
410        case CHILD_OF:
411          filter.setOp(Enumerations.FilterOperator.CHILDOF);
412          filter.setValue(parseCode());
413          break;
414        case DESC_LEAF:
415          filter.setOp(Enumerations.FilterOperator.DESCENDENTLEAF);
416          filter.setValue(parseCode());
417          break;
418        case EXISTS:
419          filter.setOp(Enumerations.FilterOperator.EXISTS);
420          filter.setValue(parseCode());
421          break;
422        default:
423          throw new VCLParseException("Unexpected filter operator: " + op, current().position);
424      }
425    }
426
427    private void parseIncludeVs(ConceptSetComponent conceptSet) throws VCLParseException {
428      consume(TokenType.IN);
429
430      if (current().type == TokenType.URI) {
431        conceptSet.addValueSet(current().value);
432        consume(TokenType.URI);
433      } else if (current().type == TokenType.OPEN) {
434        consume(TokenType.OPEN);
435        conceptSet.addValueSet(current().value);
436        consume(TokenType.URI);
437        consume(TokenType.CLOSE);
438      } else {
439        throw new VCLParseException("Expected URI after ^", current().position);
440      }
441    }
442
443    private void parseConjunction() throws VCLParseException {
444      ConceptSetComponent currentConceptSet = getCurrentConceptSet(false);
445
446      while (current().type == TokenType.COMMA) {
447        consume(TokenType.COMMA);
448
449        if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) {
450          String code = parseCode();
451          if (isFilterOperator(current().type)) {
452            parseFilter(currentConceptSet, code);
453          } else {
454            currentConceptSet.addConcept().setCode(code);
455          }
456        } else {
457          parseSubExpr(false);
458        }
459      }
460    }
461
462    private void parseConjunctionWithFlag(boolean isExclusion) throws VCLParseException {
463      ConceptSetComponent currentConceptSet = getCurrentConceptSet(isExclusion);
464
465      while (current().type == TokenType.COMMA) {
466        consume(TokenType.COMMA);
467
468        if (current().type == TokenType.SCODE || current().type == TokenType.QUOTED_VALUE) {
469          String code = parseCode();
470          if (isFilterOperator(current().type)) {
471            parseFilter(currentConceptSet, code);
472          } else {
473            currentConceptSet.addConcept().setCode(code);
474          }
475        } else {
476          parseSubExpr(isExclusion);
477        }
478      }
479    }
480
481    private void parseDisjunction() throws VCLParseException {
482      while (current().type == TokenType.SEMI) {
483        consume(TokenType.SEMI);
484        parseSubExpr(false);
485      }
486    }
487
488    private void parseDisjunctionWithFlag(boolean isExclusion) throws VCLParseException {
489      while (current().type == TokenType.SEMI) {
490        consume(TokenType.SEMI);
491        parseSubExpr(isExclusion);
492      }
493    }
494
495    private void parseExclusion() throws VCLParseException {
496      consume(TokenType.DASH);
497      parseSubExpr(true);
498    }
499
500    private String parseCode() throws VCLParseException {
501      if (current().type == TokenType.SCODE) {
502        String code = current().value;
503        consume(TokenType.SCODE);
504        return code;
505      } else if (current().type == TokenType.QUOTED_VALUE) {
506        String code = current().value;
507        consume(TokenType.QUOTED_VALUE);
508        return code;
509      } else {
510        throw new VCLParseException("Expected code", current().position);
511      }
512    }
513
514    private String parseQuotedString() throws VCLParseException {
515      if (current().type == TokenType.QUOTED_VALUE) {
516        String value = current().value;
517        consume(TokenType.QUOTED_VALUE);
518        return value;
519      } else {
520        throw new VCLParseException("Expected quoted string", current().position);
521      }
522    }
523
524    private String parseFilterValue() throws VCLParseException {
525      if (current().type == TokenType.OPEN) {
526        consume(TokenType.OPEN);
527        StringBuilder sb = new StringBuilder();
528        sb.append(parseCode());
529
530        while (current().type == TokenType.COMMA) {
531          consume(TokenType.COMMA);
532          sb.append(",").append(parseCode());
533        }
534
535        consume(TokenType.CLOSE);
536        return sb.toString();
537      } else if (current().type == TokenType.URI) {
538        String uri = current().value;
539        consume(TokenType.URI);
540        return uri;
541      } else {
542        return parseCode();
543      }
544    }
545
546    private ConceptSetComponent createConceptSet(String systemUri, boolean isExclusion) {
547      ConceptSetComponent conceptSet = new ConceptSetComponent();
548
549      if (systemUri != null) {
550        conceptSet.setSystem(systemUri);
551      }
552
553      if (isExclusion) {
554        compose.addExclude(conceptSet);
555      } else {
556        compose.addInclude(conceptSet);
557      }
558
559      return conceptSet;
560    }
561
562    private ConceptSetComponent getCurrentConceptSet(boolean isExclusion) {
563      if (isExclusion) {
564        List<ConceptSetComponent> excludes = compose.getExclude();
565        return excludes.isEmpty() ? createConceptSet(null, true) : excludes.get(excludes.size() - 1);
566      } else {
567        List<ConceptSetComponent> includes = compose.getInclude();
568        return includes.isEmpty() ? createConceptSet(null, false) : includes.get(includes.size() - 1);
569      }
570    }
571
572    private boolean isFilterOperator(TokenType type) {
573      return type == TokenType.EQ || type == TokenType.IS_A || type == TokenType.IS_NOT_A ||
574        type == TokenType.DESC_OF || type == TokenType.REGEX || type == TokenType.IN ||
575        type == TokenType.NOT_IN || type == TokenType.GENERALIZES || type == TokenType.CHILD_OF ||
576        type == TokenType.DESC_LEAF || type == TokenType.EXISTS;
577    }
578
579    private Token current() {
580      return pos < tokens.size() ? tokens.get(pos) : new Token(TokenType.EOF, "", -1);
581    }
582
583    private Token peek() {
584      return pos + 1 < tokens.size() ? tokens.get(pos + 1) : new Token(TokenType.EOF, "", -1);
585    }
586
587    private void consume(TokenType expected) throws VCLParseException {
588      if (current().type != expected) {
589        throw new VCLParseException("Expected " + expected + " but got " + current().type, current().position);
590      }
591      pos++;
592    }
593
594    private void expect(TokenType expected) throws VCLParseException {
595      if (current().type != expected) {
596        throw new VCLParseException("Expected " + expected + " but got " + current().type, current().position);
597      }
598    }
599  }
600
601  public static ValueSet parse(String vclExpression) throws VCLParseException {
602    if (vclExpression == null || vclExpression.trim().isEmpty()) {
603      throw new VCLParseException("VCL expression cannot be empty");
604    }
605
606    Lexer lexer = new Lexer(vclExpression);
607    List<Token> tokens = lexer.tokenize();
608
609    Parser parser = new Parser(tokens);
610    return parser.parse();
611  }
612
613  public static ValueSet parseAndId(String vclExpression) throws VCLParseException, IOException {
614    ValueSet vs = parse(vclExpression);
615    String json = new JsonParser().composeString(vs);
616    vs.setUrl("cid:" + json.hashCode());
617    return vs;
618  }
619
620}