From 67991f291866faedf8ae31ac39dbd27f96f98327 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Tue, 2 May 2023 12:48:37 +0200 Subject: [PATCH] Add auto-generated filter `Parser` --- internal/filter/parser.go | 1963 ++++++++++++++++++++++++++++++++----- 1 file changed, 1717 insertions(+), 246 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 1ce642bf7..025e3e5a9 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,357 +1,1828 @@ +// Code generated by pigeon; DO NOT EDIT. + package filter import ( + "bytes" + "errors" "fmt" + "io" + "io/ioutil" + "math" "net/url" + "os" + "sort" + "strconv" "strings" + "unicode" + "unicode/utf8" ) -type Parser struct { - tag string - pos, length, openParenthesis int +const LogicalAnd = "&" + +// ParseFilter wraps the auto generated filter.Parse function. +// It parses the given filter string and returns on success a Filter instance. +func ParseFilter(expr string, opts ...Option) (Filter, error) { + filter, err := Parse("", []byte(expr), opts...) + if err != nil { + return nil, err + } + + return filter.(Filter), nil } -// Parse parses an object filter expression. -func Parse(expression string) (Filter, error) { - parser := &Parser{tag: expression, length: len(expression)} - if parser.length == 0 { - return &All{}, nil +func appendToNewChain(currentChain Filter, operator string, rules ...Filter) (Filter, error) { + newChain, err := NewChain(currentChain, operator) + if err != nil { + return nil, err } - return parser.readFilter(0, "", nil) + newChain.Add(rules...) + + return newChain, nil } -// readFilter reads the entire filter from the Parser.tag and derives a filter.Filter from it. -// Returns an error on parsing failure. -func (p *Parser) readFilter(nestingLevel int, operator string, rules []Filter) (Filter, error) { - negate := false - for p.pos < p.length { - condition, err := p.readCondition() - if err != nil { - return nil, err +// mergeChains merges the given filter with the new one (initiated with the provided operator). +// When the specified Filter is not of type *Any or the given operator is not a LogicalAnd, this is a no-op. +// Otherwise, it will pop the last rule of that chain and append it the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The argument `rule` is the filter *Any chain contains the first two conditions, and what this function +// will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +func mergeChains(rule Filter, logicalOp string, rules []interface{}) (Chainable, bool) { + if chain, ok := rule.(*Any); ok && logicalOp == LogicalAnd { + andChain := &All{} + // Retrieve the last pushed condition and append it to the new "And" chain instead + andChain.Add(chain.Pop()) + + for _, filterRule := range rules { + andChain.Add(filterRule.(Filter)) } - next := p.readChar() - if condition == nil { - if next == "!" { - negate = true - continue - } + chain.Add(andChain) - if operator == "" && len(rules) > 0 && (next == "&" || next == "|") { - operator = next - continue - } + return chain, true + } - if next == "" { - break - } + return nil, false +} - if next == ")" { - p.openParenthesis-- +var g = &grammar{ + rules: []*rule{ + { + name: "FilterRule", + pos: position{line: 56, col: 1, offset: 1681}, + expr: &actionExpr{ + pos: position{line: 56, col: 15, offset: 1695}, + run: (*parser).callonFilterRule1, + expr: &seqExpr{ + pos: position{line: 56, col: 15, offset: 1695}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 56, col: 15, offset: 1695}, + label: "groups", + expr: &ruleRefExpr{ + pos: position{line: 56, col: 22, offset: 1702}, + name: "FilterRuleGroups", + }, + }, + &ruleRefExpr{ + pos: position{line: 56, col: 39, offset: 1719}, + name: "EOF", + }, + }, + }, + }, + }, + { + name: "FilterRuleGroups", + pos: position{line: 59, col: 1, offset: 1750}, + expr: &choiceExpr{ + pos: position{line: 59, col: 21, offset: 1770}, + alternatives: []interface{}{ + &actionExpr{ + pos: position{line: 59, col: 21, offset: 1770}, + run: (*parser).callonFilterRuleGroups2, + expr: &seqExpr{ + pos: position{line: 59, col: 21, offset: 1770}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 59, col: 21, offset: 1770}, + label: "negateGroup", + expr: &zeroOrOneExpr{ + pos: position{line: 59, col: 33, offset: 1782}, + expr: &ruleRefExpr{ + pos: position{line: 59, col: 33, offset: 1782}, + name: "Negate", + }, + }, + }, + &labeledExpr{ + pos: position{line: 59, col: 41, offset: 1790}, + label: "group", + expr: &ruleRefExpr{ + pos: position{line: 59, col: 47, offset: 1796}, + name: "FilterRuleGroup", + }, + }, + &labeledExpr{ + pos: position{line: 59, col: 63, offset: 1812}, + label: "logicalOp", + expr: &ruleRefExpr{ + pos: position{line: 59, col: 73, offset: 1822}, + name: "LogicalAndOrOperator", + }, + }, + &labeledExpr{ + pos: position{line: 59, col: 94, offset: 1843}, + label: "groups", + expr: &ruleRefExpr{ + pos: position{line: 59, col: 101, offset: 1850}, + name: "FilterRuleGroup", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 72, col: 5, offset: 2184}, + run: (*parser).callonFilterRuleGroups13, + expr: &labeledExpr{ + pos: position{line: 72, col: 5, offset: 2184}, + label: "group", + expr: &ruleRefExpr{ + pos: position{line: 72, col: 11, offset: 2190}, + name: "FilterRuleGroup", + }, + }, + }, + }, + }, + }, + { + name: "FilterRuleGroup", + pos: position{line: 75, col: 1, offset: 2234}, + expr: &choiceExpr{ + pos: position{line: 75, col: 20, offset: 2253}, + alternatives: []interface{}{ + &actionExpr{ + pos: position{line: 75, col: 20, offset: 2253}, + run: (*parser).callonFilterRuleGroup2, + expr: &seqExpr{ + pos: position{line: 75, col: 20, offset: 2253}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 75, col: 20, offset: 2253}, + label: "chainsAndOp", + expr: &zeroOrOneExpr{ + pos: position{line: 75, col: 32, offset: 2265}, + expr: &seqExpr{ + pos: position{line: 75, col: 33, offset: 2266}, + exprs: []interface{}{ + &ruleRefExpr{ + pos: position{line: 75, col: 33, offset: 2266}, + name: "FilterChains", + }, + &ruleRefExpr{ + pos: position{line: 75, col: 46, offset: 2279}, + name: "LogicalAndOrOperator", + }, + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 75, col: 69, offset: 2302}, + label: "negateGroups", + expr: &zeroOrOneExpr{ + pos: position{line: 75, col: 82, offset: 2315}, + expr: &ruleRefExpr{ + pos: position{line: 75, col: 82, offset: 2315}, + name: "Negate", + }, + }, + }, + &litMatcher{ + pos: position{line: 75, col: 90, offset: 2323}, + val: "(", + ignoreCase: false, + want: "\"(\"", + }, + &labeledExpr{ + pos: position{line: 75, col: 94, offset: 2327}, + label: "groups", + expr: &ruleRefExpr{ + pos: position{line: 75, col: 101, offset: 2334}, + name: "FilterRuleGroups", + }, + }, + &litMatcher{ + pos: position{line: 75, col: 118, offset: 2351}, + val: ")", + ignoreCase: false, + want: "\")\"", + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 94, col: 5, offset: 2823}, + run: (*parser).callonFilterRuleGroup16, + expr: &labeledExpr{ + pos: position{line: 94, col: 5, offset: 2823}, + label: "chain", + expr: &ruleRefExpr{ + pos: position{line: 94, col: 11, offset: 2829}, + name: "FilterChains", + }, + }, + }, + }, + }, + }, + { + name: "FilterChains", + pos: position{line: 97, col: 1, offset: 2870}, + expr: &choiceExpr{ + pos: position{line: 97, col: 17, offset: 2886}, + alternatives: []interface{}{ + &actionExpr{ + pos: position{line: 97, col: 17, offset: 2886}, + run: (*parser).callonFilterChains2, + expr: &seqExpr{ + pos: position{line: 97, col: 17, offset: 2886}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 97, col: 17, offset: 2886}, + label: "chain", + expr: &ruleRefExpr{ + pos: position{line: 97, col: 23, offset: 2892}, + name: "FilterChainExpr", + }, + }, + &labeledExpr{ + pos: position{line: 97, col: 39, offset: 2908}, + label: "logicalOp", + expr: &ruleRefExpr{ + pos: position{line: 97, col: 49, offset: 2918}, + name: "LogicalAndOrOperator", + }, + }, + &labeledExpr{ + pos: position{line: 97, col: 70, offset: 2939}, + label: "chains", + expr: &ruleRefExpr{ + pos: position{line: 97, col: 77, offset: 2946}, + name: "FilterChainExpr", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 100, col: 5, offset: 3053}, + run: (*parser).callonFilterChains10, + expr: &labeledExpr{ + pos: position{line: 100, col: 5, offset: 3053}, + label: "chain", + expr: &ruleRefExpr{ + pos: position{line: 100, col: 11, offset: 3059}, + name: "FilterChainExpr", + }, + }, + }, + }, + }, + }, + { + name: "FilterChainExpr", + pos: position{line: 103, col: 1, offset: 3105}, + expr: &actionExpr{ + pos: position{line: 103, col: 20, offset: 3124}, + run: (*parser).callonFilterChainExpr1, + expr: &seqExpr{ + pos: position{line: 103, col: 20, offset: 3124}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 103, col: 20, offset: 3124}, + label: "rule", + expr: &ruleRefExpr{ + pos: position{line: 103, col: 25, offset: 3129}, + name: "FilterChainOrCondExpr", + }, + }, + &labeledExpr{ + pos: position{line: 103, col: 47, offset: 3151}, + label: "condAndOp", + expr: &zeroOrOneExpr{ + pos: position{line: 103, col: 57, offset: 3161}, + expr: &seqExpr{ + pos: position{line: 103, col: 58, offset: 3162}, + exprs: []interface{}{ + &ruleRefExpr{ + pos: position{line: 103, col: 58, offset: 3162}, + name: "LogicalAndOrOperator", + }, + &ruleRefExpr{ + pos: position{line: 103, col: 79, offset: 3183}, + name: "ConditionExpr", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "FilterChainOrCondExpr", + pos: position{line: 116, col: 1, offset: 3530}, + expr: &actionExpr{ + pos: position{line: 116, col: 26, offset: 3555}, + run: (*parser).callonFilterChainOrCondExpr1, + expr: &seqExpr{ + pos: position{line: 116, col: 26, offset: 3555}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 116, col: 26, offset: 3555}, + label: "negate", + expr: &zeroOrOneExpr{ + pos: position{line: 116, col: 33, offset: 3562}, + expr: &ruleRefExpr{ + pos: position{line: 116, col: 33, offset: 3562}, + name: "Negate", + }, + }, + }, + &labeledExpr{ + pos: position{line: 116, col: 41, offset: 3570}, + label: "cond", + expr: &ruleRefExpr{ + pos: position{line: 116, col: 46, offset: 3575}, + name: "ConditionExpr", + }, + }, + &labeledExpr{ + pos: position{line: 116, col: 60, offset: 3589}, + label: "condAndOp", + expr: &zeroOrOneExpr{ + pos: position{line: 116, col: 70, offset: 3599}, + expr: &seqExpr{ + pos: position{line: 116, col: 71, offset: 3600}, + exprs: []interface{}{ + &ruleRefExpr{ + pos: position{line: 116, col: 71, offset: 3600}, + name: "LogicalAndOrOperator", + }, + &zeroOrOneExpr{ + pos: position{line: 116, col: 92, offset: 3621}, + expr: &ruleRefExpr{ + pos: position{line: 116, col: 92, offset: 3621}, + name: "Negate", + }, + }, + &ruleRefExpr{ + pos: position{line: 116, col: 100, offset: 3629}, + name: "ConditionExpr", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "ConditionExpr", + pos: position{line: 142, col: 1, offset: 4174}, + expr: &choiceExpr{ + pos: position{line: 142, col: 18, offset: 4191}, + alternatives: []interface{}{ + &actionExpr{ + pos: position{line: 142, col: 18, offset: 4191}, + run: (*parser).callonConditionExpr2, + expr: &seqExpr{ + pos: position{line: 142, col: 18, offset: 4191}, + exprs: []interface{}{ + &labeledExpr{ + pos: position{line: 142, col: 18, offset: 4191}, + label: "col", + expr: &ruleRefExpr{ + pos: position{line: 142, col: 22, offset: 4195}, + name: "Identifier", + }, + }, + &labeledExpr{ + pos: position{line: 142, col: 33, offset: 4206}, + label: "op", + expr: &ruleRefExpr{ + pos: position{line: 142, col: 36, offset: 4209}, + name: "BinaryOperator", + }, + }, + &labeledExpr{ + pos: position{line: 142, col: 51, offset: 4224}, + label: "val", + expr: &ruleRefExpr{ + pos: position{line: 142, col: 55, offset: 4228}, + name: "Identifier", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 155, col: 5, offset: 4504}, + run: (*parser).callonConditionExpr10, + expr: &labeledExpr{ + pos: position{line: 155, col: 5, offset: 4504}, + label: "expr", + expr: &ruleRefExpr{ + pos: position{line: 155, col: 10, offset: 4509}, + name: "ExistsExpr", + }, + }, + }, + }, + }, + }, + { + name: "ExistsExpr", + pos: position{line: 158, col: 1, offset: 4547}, + expr: &actionExpr{ + pos: position{line: 158, col: 15, offset: 4561}, + run: (*parser).callonExistsExpr1, + expr: &labeledExpr{ + pos: position{line: 158, col: 15, offset: 4561}, + label: "col", + expr: &choiceExpr{ + pos: position{line: 158, col: 20, offset: 4566}, + alternatives: []interface{}{ + &seqExpr{ + pos: position{line: 158, col: 20, offset: 4566}, + exprs: []interface{}{ + &ruleRefExpr{ + pos: position{line: 158, col: 20, offset: 4566}, + name: "Identifier", + }, + ¬Expr{ + pos: position{line: 158, col: 31, offset: 4577}, + expr: &ruleRefExpr{ + pos: position{line: 158, col: 32, offset: 4578}, + name: "BinaryOperator", + }, + }, + ¬Expr{ + pos: position{line: 158, col: 47, offset: 4593}, + expr: &litMatcher{ + pos: position{line: 158, col: 48, offset: 4594}, + val: "(", + ignoreCase: false, + want: "\"(\"", + }, + }, + }, + }, + &seqExpr{ + pos: position{line: 158, col: 54, offset: 4600}, + exprs: []interface{}{ + &ruleRefExpr{ + pos: position{line: 158, col: 54, offset: 4600}, + name: "Identifier", + }, + &andExpr{ + pos: position{line: 158, col: 65, offset: 4611}, + expr: &ruleRefExpr{ + pos: position{line: 158, col: 66, offset: 4612}, + name: "EOF", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "Negate", + pos: position{line: 162, col: 1, offset: 4696}, + expr: &actionExpr{ + pos: position{line: 162, col: 11, offset: 4706}, + run: (*parser).callonNegate1, + expr: &litMatcher{ + pos: position{line: 162, col: 11, offset: 4706}, + val: "!", + ignoreCase: false, + want: "\"!\"", + }, + }, + }, + { + name: "LogicalAndOrOperator", + pos: position{line: 165, col: 1, offset: 4745}, + expr: &actionExpr{ + pos: position{line: 165, col: 25, offset: 4769}, + run: (*parser).callonLogicalAndOrOperator1, + expr: &choiceExpr{ + pos: position{line: 165, col: 26, offset: 4770}, + alternatives: []interface{}{ + &litMatcher{ + pos: position{line: 165, col: 26, offset: 4770}, + val: "&", + ignoreCase: false, + want: "\"&\"", + }, + &litMatcher{ + pos: position{line: 165, col: 32, offset: 4776}, + val: "|", + ignoreCase: false, + want: "\"|\"", + }, + }, + }, + }, + }, + { + name: "BinaryOperator", + pos: position{line: 168, col: 1, offset: 4816}, + expr: &actionExpr{ + pos: position{line: 168, col: 19, offset: 4834}, + run: (*parser).callonBinaryOperator1, + expr: &choiceExpr{ + pos: position{line: 168, col: 21, offset: 4836}, + alternatives: []interface{}{ + &litMatcher{ + pos: position{line: 168, col: 21, offset: 4836}, + val: "!~", + ignoreCase: false, + want: "\"!~\"", + }, + &litMatcher{ + pos: position{line: 168, col: 28, offset: 4843}, + val: "~", + ignoreCase: false, + want: "\"~\"", + }, + &litMatcher{ + pos: position{line: 168, col: 34, offset: 4849}, + val: "<=", + ignoreCase: false, + want: "\"<=\"", + }, + &litMatcher{ + pos: position{line: 168, col: 41, offset: 4856}, + val: ">=", + ignoreCase: false, + want: "\">=\"", + }, + &litMatcher{ + pos: position{line: 168, col: 48, offset: 4863}, + val: "!=", + ignoreCase: false, + want: "\"!=\"", + }, + &litMatcher{ + pos: position{line: 168, col: 55, offset: 4870}, + val: "=", + ignoreCase: false, + want: "\"=\"", + }, + &litMatcher{ + pos: position{line: 168, col: 61, offset: 4876}, + val: "<", + ignoreCase: false, + want: "\"<\"", + }, + &litMatcher{ + pos: position{line: 168, col: 67, offset: 4882}, + val: ">", + ignoreCase: false, + want: "\">\"", + }, + }, + }, + }, + }, + { + name: "Identifier", + displayName: "\"column or value\"", + pos: position{line: 171, col: 1, offset: 4922}, + expr: &actionExpr{ + pos: position{line: 171, col: 33, offset: 4954}, + run: (*parser).callonIdentifier1, + expr: &oneOrMoreExpr{ + pos: position{line: 171, col: 33, offset: 4954}, + expr: &charClassMatcher{ + pos: position{line: 171, col: 33, offset: 4954}, + val: "[^!&|~<>=()]", + chars: []rune{'!', '&', '|', '~', '<', '>', '=', '(', ')'}, + ignoreCase: false, + inverted: true, + }, + }, + }, + }, + { + name: "EOF", + pos: position{line: 174, col: 1, offset: 5003}, + expr: ¬Expr{ + pos: position{line: 174, col: 8, offset: 5010}, + expr: &anyMatcher{ + line: 174, col: 9, offset: 5011, + }, + }, + }, + }, +} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +func (c *current) onFilterRule1(groups interface{}) (interface{}, error) { + return groups, nil +} - break - } +func (p *parser) callonFilterRule1() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterRule1(stack["groups"]) +} - return nil, p.parseError(next, "") - } +func (c *current) onFilterRuleGroups2(negateGroup, group, logicalOp, groups interface{}) (interface{}, error) { + rule := group.(Filter) + if negateGroup != nil { + rule = &None{rules: []Filter{rule}} + } - if next == "(" { - if p.nextChar() == "&" || p.nextChar() == "|" { - // When a logical operator follows directly after the opening parenthesis "(", - // this can't be a valid expression. E.g. "!(&" - next = p.readChar() + chain, merged := mergeChains(rule, logicalOp.(string), []interface{}{groups}) + if merged { + return chain, nil + } - return nil, p.parseError(next, "") - } + return appendToNewChain(rule, logicalOp.(string), groups.(Filter)) - p.openParenthesis++ +} - op := "" - if negate { - op = "!" - } +func (p *parser) callonFilterRuleGroups2() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterRuleGroups2(stack["negateGroup"], stack["group"], stack["logicalOp"], stack["groups"]) +} - rule, err := p.readFilter(nestingLevel+1, op, nil) - if err != nil { - return nil, err - } +func (c *current) onFilterRuleGroups13(group interface{}) (interface{}, error) { + return group, nil - rules = append(rules, rule) - negate = false - continue - } +} - if next == operator { - continue - } +func (p *parser) callonFilterRuleGroups13() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterRuleGroups13(stack["group"]) +} - // When the current operator is a "!", the next one can't be a logical operator. - if operator != "!" && (next == "&" || next == "|") { - if operator == "&" { - if len(rules) > 1 { - rules = []Filter{&All{rules: rules}} - } - - operator = next - } else if operator == "|" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] - - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } - - rules = append(rules, rule) - } +func (c *current) onFilterRuleGroup2(chainsAndOp, negateGroups, groups interface{}) (interface{}, error) { + if negateGroups != nil { + groups = &None{rules: []Filter{groups.(Filter)}} + } - continue - } + if chainsAndOp == nil { + return groups, nil + } - return nil, p.parseError(next, fmt.Sprintf("operator level %d", nestingLevel)) - } else { - if negate { - negate = false - rules = append(rules, &None{rules: []Filter{condition}}) - } else { - rules = append(rules, condition) - } + rule := chainsAndOp.([]interface{})[0].(Filter) + logicalOp := chainsAndOp.([]interface{})[1].(string) - if next == "" { - break - } + chain, merged := mergeChains(rule, logicalOp, []interface{}{groups.(Filter)}) + if merged { + return chain, nil + } - if next == ")" { - p.openParenthesis-- + return appendToNewChain(rule, logicalOp, groups.(Filter)) - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +} - break - } +func (p *parser) callonFilterRuleGroup2() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterRuleGroup2(stack["chainsAndOp"], stack["negateGroups"], stack["groups"]) +} - return nil, p.parseError(next, "") - } +func (c *current) onFilterRuleGroup16(chain interface{}) (interface{}, error) { + return chain, nil - if next == operator { - continue - } +} - if next == "&" || next == "|" { - if operator == "" || operator == "&" { - if operator == "&" && len(rules) > 1 { - all := &All{rules: rules} - rules = []Filter{all} - } - - operator = next - } else if operator == "" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] - - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } - - rules = append(rules, rule) - } +func (p *parser) callonFilterRuleGroup16() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterRuleGroup16(stack["chain"]) +} - continue - } +func (c *current) onFilterChains2(chain, logicalOp, chains interface{}) (interface{}, error) { + return appendToNewChain(chain.(Filter), logicalOp.(string), chains.(Filter)) - return nil, p.parseError(next, "") - } +} + +func (p *parser) callonFilterChains2() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterChains2(stack["chain"], stack["logicalOp"], stack["chains"]) +} + +func (c *current) onFilterChains10(chain interface{}) (interface{}, error) { + return chain, nil + +} + +func (p *parser) callonFilterChains10() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterChains10(stack["chain"]) +} + +func (c *current) onFilterChainExpr1(rule, condAndOp interface{}) (interface{}, error) { + if condAndOp == nil { + return rule, nil } - if nestingLevel == 0 && p.pos < p.length { - return nil, p.parseError(operator, "Did not read full filter") + conditions := condAndOp.([]interface{}) + chain, merged := mergeChains(rule.(Filter), conditions[0].(string), conditions[1:]) + if merged { + return chain, nil } - if nestingLevel == 0 && p.openParenthesis > 0 { - return nil, fmt.Errorf("invalid filter '%s', missing %d closing ')' at pos %d", p.tag, p.openParenthesis, p.pos) + return appendToNewChain(rule.(Filter), conditions[0].(string), conditions[1].(Filter)) +} + +func (p *parser) callonFilterChainExpr1() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterChainExpr1(stack["rule"], stack["condAndOp"]) +} + +func (c *current) onFilterChainOrCondExpr1(negate, cond, condAndOp interface{}) (interface{}, error) { + rule := cond.(Filter) + if negate != nil { + rule = &None{rules: []Filter{rule}} } - if nestingLevel == 0 && p.openParenthesis < 0 { - return nil, fmt.Errorf("invalid filter '%s', unexpected closing ')' at pos %d", p.tag, p.pos) + if condAndOp == nil { + return rule, nil } - var chain Filter - switch operator { - case "&": - chain = &All{rules: rules} - case "|": - chain = &Any{rules: rules} - case "!": - chain = &None{rules: rules} - case "": - if nestingLevel == 0 && rules != nil { - // There is only one filter tag, no chain - return rules[0], nil - } + conditions := condAndOp.([]interface{}) + logicalOp := conditions[0].(string) + condition := conditions[2].(Filter) + if conditions[1] != nil { + condition = &None{rules: []Filter{condition}} + } - chain = &All{rules: rules} - default: - return nil, p.parseError(operator, "") + chain, err := NewChain(rule, logicalOp) + if err != nil { + return nil, err } + chain.Add(condition.(Filter)) + return chain, nil } -// readCondition reads the next filter.Filter. -// returns nil if there is no char to read and an error on parsing failure. -func (p *Parser) readCondition() (Filter, error) { - column, err := p.readColumn() - if err != nil || column == "" { +func (p *parser) callonFilterChainOrCondExpr1() (interface{}, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFilterChainOrCondExpr1(stack["negate"], stack["cond"], stack["condAndOp"]) +} + +func (c *current) onConditionExpr2(col, op, val interface{}) (interface{}, error) { + column, err := url.QueryUnescape(col.(string)) + if err != nil { return nil, err } - operator := "" - if strings.Contains("=> 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +type resultTuple struct { + v interface{} + b bool + end savepoint +} + +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]interface{} + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string - return &Equal{column: column, value: value}, nil - case "!=": - if strings.Contains(value, "*") { - return &Unlike{column: column, value: value}, nil + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]interface{} +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]interface{}) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr interface{}) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]interface{}, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] } - return &UnEqual{column: column, value: value}, nil - case ">": - return &GreaterThan{column: column, value: value}, nil - case ">=": - return &GreaterThanOrEqual{column: column, value: value}, nil - case "<": - return &LessThan{column: column, value: value}, nil - case "<=": - return &LessThanOrEqual{column: column, value: value}, nil + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +func (p *parser) parse(g *grammar) (val interface{}, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRule(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] default: - return nil, fmt.Errorf("invalid operator %s provided", operator) + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] } } -// readColumn reads a column name from the Parser.tag. -// returns empty string if there is no char to read. -func (p *Parser) readColumn() (string, error) { - return url.QueryUnescape(p.readUntil("=()&|> p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val interface{} + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (interface{}, bool) { + start := p.pt + val, ok := p.parseExpr(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + + val = actVal + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (interface{}, bool) { + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (interface{}, bool) { + pt := p.pt + p.pushV() + _, ok := p.parseExpr(and.expr) + p.popV() + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (interface{}, bool) { + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool) { + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false } -// readValue reads a single value from the Parser.tag. -// returns empty string and a parsing error on invalid filter -func (p *Parser) readValue() (string, error) { - value := p.readUntil("()&|><") - if value == "" { - return "", nil +func (p *parser) parseChoiceExpr(ch *choiceExpr) (interface{}, bool) { + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + p.pushV() + val, ok := p.parseExpr(alt) + p.popV() + if ok { + return val, ok + } } + return nil, false +} - return url.QueryUnescape(value) +func (p *parser) parseLabeledExpr(lab *labeledExpr) (interface{}, bool) { + p.pushV() + val, ok := p.parseExpr(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok } -// readUntil reads chars until any of the given characters -// May return empty string if there is no char to read -func (p *Parser) readUntil(chars string) string { - var buffer string - for char := p.readChar(); char != ""; char = p.readChar() { - if strings.Contains(chars, char) { - p.pos-- - break +func (p *parser) parseLitMatcher(lit *litMatcher) (interface{}, bool) { + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} - buffer += char +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (interface{}, bool) { + ok, err := not.run(p) + if err != nil { + p.addErr(err) } - return buffer + return nil, !ok } -// readChar peeks the next char of the Parser.tag and increments the Parser.pos by one -// returns empty if there is no char to read -func (p *Parser) readChar() string { - if p.pos < p.length { - pos := p.pos - p.pos++ +func (p *parser) parseNotExpr(not *notExpr) (interface{}, bool) { + pt := p.pt + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExpr(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restore(pt) + + return nil, !ok +} - return string(p.tag[pos]) +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (interface{}, bool) { + var vals []interface{} + + for { + p.pushV() + val, ok := p.parseExpr(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (interface{}, bool) { + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExpr(recover.expr) + p.popRecovery() - return "" + return val, ok } -// nextChar peeks the next char from the parser tag -// returns empty string if there is no char to read -func (p *Parser) nextChar() string { - if p.pos < p.length { - return string(p.tag[p.pos]) +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (interface{}, bool) { + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) } - return "" + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRule(rule) } -// parseError returns a formatted and detailed parser error. -// If you don't provide the char that causes the parser to fail, the char at `p.pos` is automatically used. -// By specifying the `msg` arg you can provide additional err hints that can help debugging. -func (p *Parser) parseError(invalidChar string, msg string) error { - if invalidChar == "" { - pos := p.pos - if p.pos == p.length { - pos-- +func (p *parser) parseSeqExpr(seq *seqExpr) (interface{}, bool) { + vals := make([]interface{}, 0, len(seq.exprs)) + + pt := p.pt + for _, expr := range seq.exprs { + val, ok := p.parseExpr(expr) + if !ok { + p.restore(pt) + return nil, false } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (interface{}, bool) { - invalidChar = string(p.tag[pos]) + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExpr(recoverExpr); ok { + return val, ok + } + } } - if msg != "" { - msg = ": " + msg + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (interface{}, bool) { + var vals []interface{} + + for { + p.pushV() + val, ok := p.parseExpr(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) } +} - return fmt.Errorf("invalid filter '%s', unexpected %s at pos %d%s", p.tag, invalidChar, p.pos, msg) +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (interface{}, bool) { + p.pushV() + val, _ := p.parseExpr(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true }