diff --git a/.gitignore b/.gitignore index 84c048a7..3b9f9945 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /build/ + +# Exclude the autogenerated parser.output file +parser.output diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go new file mode 100644 index 00000000..aa7048ed --- /dev/null +++ b/internal/filter/filter_test.go @@ -0,0 +1,140 @@ +package filter + +import ( + "errors" + "github.com/stretchr/testify/assert" + "regexp" + "strings" + "testing" +) + +const unknown string = "unknown" + +var evalError = errors.New("evaluation error") + +func TestFilter(t *testing.T) { + t.Parallel() + + filterable := &filterableType{ + key: "domain", + value: "example.com", + } + + t.Run("InvalidOperator", func(t *testing.T) { + chain, err := NewChain("unknown", nil) + assert.Nil(t, chain) + assert.EqualError(t, err, "invalid logical operator provided: \"unknown\"") + + condition, err := NewCondition("column", "unknown", "value") + assert.Nil(t, condition) + assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"") + }) + + t.Run("EvaluationError", func(t *testing.T) { + t.Parallel() + + testInvalidData := []struct { + Expression string + }{ + {"domain=" + unknown}, + {"domain!=" + unknown}, + {"domain<" + unknown}, + {"domain<=" + unknown}, + {"domain>" + unknown}, + {"domain>=" + unknown}, + {"domain~" + unknown}, + {"domain!~" + unknown}, + {"!(domain!=" + unknown + ")"}, + {"domain=" + unknown + "&domain<=test.example.com"}, + {"domain<=" + unknown + "|domain<=test.example.com"}, + } + + for _, td := range testInvalidData { + f, err := Parse(td.Expression) + assert.NoError(t, err) + + matched, err := f.Eval(filterable) + assert.EqualError(t, err, evalError.Error()) + assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression) + } + }) + + t.Run("EvaluateFilter", func(t *testing.T) { + t.Parallel() + + testdata := []struct { + Expression string + Expected bool + }{ + {"domain=example.com", true}, + {"domain!=example.com", false}, + {"domain=test.example.com", false}, + {"name!=example.com", false}, + {"domain", true}, + {"name", false}, + {"display_name", false}, + {"!name", true}, + {"domain~example*", true}, + {"domain!~example*", false}, + {"domain~example*&!domain", false}, + {"domain>a", true}, + {"domainz", false}, + {"domain=example&domain<=test.example.com", true}, + {"domain<=example|domain<=test.example.com", true}, + {"domain<=example|domain>=test.example.com", false}, + } + + for _, td := range testdata { + f, err := Parse(td.Expression) + if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) { + matched, err := f.Eval(filterable) + assert.NoError(t, err) + assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression) + } + } + }) +} + +type filterableType struct { + key string + value string +} + +func (f *filterableType) EvalEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return strings.EqualFold(f.value, value), nil +} + +func (f *filterableType) EvalLess(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return f.value < value, nil +} + +func (f *filterableType) EvalLike(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + regex := regexp.MustCompile("^example.*$") + return regex.MatchString(f.value), nil +} + +func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return f.value <= value, nil +} + +func (f *filterableType) EvalExists(key string) bool { + return f.key == key +} diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go new file mode 100644 index 00000000..ff846c44 --- /dev/null +++ b/internal/filter/lexer.go @@ -0,0 +1,177 @@ +//go:generate goyacc -l -v parser.output -o parser.go parser.y + +package filter + +import ( + "errors" + "fmt" + "regexp" + "strings" + "text/scanner" +) + +// identifiersMatcher contains a compiled regexp and is used by the Lexer to match filter identifiers. +// Currently, it allows to match any character except a LogicalOp and CompOperator. +var identifiersMatcher = regexp.MustCompile("[^!&|~<>=()]") + +// tokenDisplayNames contains a list of all the defined parser tokens and their respective +// friendly names used to output in error messages. +var tokenDisplayNames = map[string]string{ + "$unk": `"unknown"`, + "T_EQUAL": `"="`, + "T_UNEQUAL": `"!="`, + "T_LIKE": `"~"`, + "T_UNLIKE": `"!~"`, + "T_LESS_THAN": `"<"`, + "T_GREATER_THAN": `">"`, + "T_LESS_THAN_OR_EQUAL": `"<="`, + "T_GREATER_THAN_OR_EQUAL": `">="`, + "T_IDENTIFIER": `"column or value"`, +} + +// init just sets the global yyErrorVerbose variable to true. +func init() { + // Enable parsers error verbose to get more context of the parsing failures + yyErrorVerbose = true +} + +// Parse wraps the auto generated yyParse function. +// It parses the given filter string and returns on success a Filter instance. +func Parse(expr string) (rule Filter, err error) { + lex := new(Lexer) + lex.IsIdentRune = isIdentRune + lex.Init(strings.NewReader(expr)) + + // Set the scanner mode to recognize only identifiers. This way all unrecognized tokens will be returned + // just as they are, and our Lexer#Lex() method will then recognize whatever valid input is required. + // Note: This is in fact not necessary, as our custom function `isIdentRune` accepts any token that matches the + // regex pattern `identifiersMatcher`, so the scanner would never match all the scanner.GoTokens except ScanIdents. + lex.Mode = scanner.ScanIdents + + // scanner.Init sets the error function to nil, therefore, we have to register + // our error function after the scanner initialization. + lex.Scanner.Error = lex.ScanError + + defer func() { + // All the grammar rules panics when encountering any errors while reducing the filter rules, so try + // to recover from it and return an error instead. Since we're using a named return values, we can set + // the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover + if r := recover(); r != nil { + err = errors.New(fmt.Sprint(r)) + } + + if err != nil { + // The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it. + rule = nil + } + }() + + yyParse(lex) + + rule, err = lex.rule, lex.err + return +} + +// Lexer is used to tokenize the filter input into a set of literals. +// This is just a wrapper around the Scanner type and implements the yyLexer interface used by the parser. +type Lexer struct { + scanner.Scanner + + rule Filter + err error +} + +func (l *Lexer) Lex(yyval *yySymType) int { + token := l.Scan() + lit := l.TokenText() + yyval.text = lit + if token == scanner.Ident { + return T_IDENTIFIER + } + + switch lit { + case "&": + return '&' + case "|": + return '|' + case "~": + return T_LIKE + case "=": + return T_EQUAL + case "(": + return '(' + case ")": + return ')' + case "!": + next := l.Peek() + switch next { + case '=', '~': + yyval.text = "!" + string(next) + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + if next == '~' { + return T_UNLIKE + } else { + return T_UNEQUAL + } + default: + return '!' + } + case "<": + if next := l.Peek(); next == '=' { + yyval.text = "<=" + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_LESS_THAN_OR_EQUAL + } + + return T_LESS_THAN + case ">": + if next := l.Peek(); next == '=' { + yyval.text = ">=" + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_GREATER_THAN_OR_EQUAL + } + + return T_GREATER_THAN + } + + // No more inputs to scan that we are interested in. + // Scan returns EOF as well if there's no more token to stream, but we just want to be explicit. + return scanner.EOF +} + +// Error receives any syntax/semantic errors produced by the parser. +// +// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some +// additional context instead. This function then wraps the provided err and adds line, column number and offset +// to the error string. Error is equivalent to "yyerror" in the original yacc. +func (l *Lexer) Error(s string) { + // Replace all parser token names by their corresponding friendly names. + for token, name := range tokenDisplayNames { + s = strings.ReplaceAll(s, token, name) + } + + l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s) + + // Always reset the current filter rule when encountering an error. + l.rule = nil +} + +// ScanError is used to capture all errors the Scanner encounters. +// +// It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just +// output to std.Err and we won't be able to notice this. Hence, this function is registered by the filter.Parse +// function after the Lexer initialisation. +func (l *Lexer) ScanError(_ *scanner.Scanner, msg string) { l.Error(msg) } + +// isIdentRune provides custom implementation of scanner.IsIdentRune. +// This function determines whether a given character is allowed to be part of an identifier. +func isIdentRune(ch rune, _ int) bool { return identifiersMatcher.MatchString(string(ch)) } diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 71dfe6b4..0c833f2a 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,357 +1,614 @@ +// Code generated by goyacc -l -v parser.output -o parser.go parser.y. DO NOT EDIT. + package filter +import __yyfmt__ "fmt" + import ( - "fmt" "net/url" - "strings" + "slices" ) -type Parser struct { - tag string - pos, length, openParenthesis int -} - -// Parse parses an object filter expression. -func Parse(expression string) (Filter, error) { - parser := &Parser{tag: expression, length: len(expression)} - if parser.length == 0 { - return &Chain{op: All}, nil - } +// reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). +// When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not +// of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. +// Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The first argument `left` is supposed to be a filter.Any Chain containing the first two conditions. +// We then call this function when the parser is processing the logical `&` op and the Unlike condition, +// and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +func reduceFilter(left Filter, op string, right Filter) Filter { + chain, ok := left.(*Chain) + if ok && chain.op == Any && LogicalOp(op) == All { + // Retrieve the last pushed filter Condition and append it to the new "And" chain instead + back := chain.pop() + // Chain#pop can return a filter Chain, and since we are only allowed to regroup two filter conditions, + // we must traverse the last element of every single popped Chain till we reach a filter condition. + for back != nil { + if backChain, ok := back.(*Chain); !ok || backChain.grouped { + // If the popped element is not of type filter Chain or the filter chain is parenthesized, + // we don't need to continue here, so break out of the loop. + break + } - return parser.readFilter(0, "", nil) -} + // Re-add the just popped item before stepping into it and popping its last item. + chain.add(back) -// readFilter reads the entire filter from the Parser.tag and derives a filter.Filter from it. -// Returns an error on parsing failure. -func (p *Parser) readFilter(nestingLevel int, operator string, rules []Filter) (Filter, error) { - negate := false - for p.pos < p.length { - condition, err := p.readCondition() - if err != nil { - return nil, err + chain = back.(*Chain) + back = chain.pop() } - next := p.readChar() - if condition == nil { - if next == "!" { - negate = true - continue - } + andChain, _ := NewChain(All, back) + // We don't need to regroup an already grouped filter chain, since braces gain + // a higher precedence than any logical operators. + if anyChain, ok := right.(*Chain); ok && anyChain.op == Any && !chain.grouped && !anyChain.grouped { + andChain.add(anyChain.top()) + // Prepend the newly created All chain + anyChain.rules = slices.Insert[[]Filter, Filter](anyChain.rules, 0, andChain) + chain.add(anyChain) + } else { + andChain.add(right) + chain.add(andChain) + } - if operator == "" && len(rules) > 0 && (next == "&" || next == "|") { - operator = next - continue - } + return left + } - if next == "" { - break - } + // If the given operator is the same as the already existing chains operator (*chain), + // we don't need to create another chain of the same operator type. Avoids something + // like &Chain{op: All, &Chain{op: All, ...}} + if chain == nil || chain.op != LogicalOp(op) { + var err error + chain, err = NewChain(LogicalOp(op), left) + if err != nil { + // Just panic, filter.Parse will try to recover from this. + panic(err) + } + } - if next == ")" { - p.openParenthesis-- + chain.add(right) - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } + return chain +} - break - } +type yySymType struct { + yys int + expr Filter + text string +} - return nil, p.parseError(next, "") - } +const T_EQUAL = 57346 +const T_UNEQUAL = 57347 +const T_LIKE = 57348 +const T_UNLIKE = 57349 +const T_LESS_THAN = 57350 +const T_GREATER_THAN = 57351 +const T_LESS_THAN_OR_EQUAL = 57352 +const T_GREATER_THAN_OR_EQUAL = 57353 +const T_IDENTIFIER = 57354 +const PREFER_SHIFTING_LOGICAL_OP = 57355 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "T_EQUAL", + "T_UNEQUAL", + "T_LIKE", + "T_UNLIKE", + "T_LESS_THAN", + "T_GREATER_THAN", + "T_LESS_THAN_OR_EQUAL", + "T_GREATER_THAN_OR_EQUAL", + "T_IDENTIFIER", + "\"|\"", + "\"&\"", + "\"!\"", + "PREFER_SHIFTING_LOGICAL_OP", + "\"(\"", + "\")\"", +} - if next == "(" { - if p.nextChar() == "&" || p.nextChar() == "|" { - // When a logical operator follows directly after the opening parenthesis "(", - // this can't be a valid expression. E.g. "!(&" - next = p.readChar() +var yyStatenames = [...]string{} - return nil, p.parseError(next, "") - } +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 - p.openParenthesis++ +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} - op := "" - if negate { - op = "!" - } +const yyPrivate = 57344 - rule, err := p.readFilter(nestingLevel+1, op, nil) - if err != nil { - return nil, err - } +const yyLast = 37 - rules = append(rules, rule) - negate = false - continue - } +var yyAct = [...]int8{ + 15, 9, 8, 17, 6, 1, 32, 24, 25, 26, + 27, 28, 30, 29, 31, 4, 17, 5, 9, 8, + 22, 14, 2, 23, 33, 16, 13, 20, 21, 3, + 18, 7, 0, 19, 10, 11, 12, +} - if next == operator { - continue - } +var yyPact = [...]int16{ + -11, 5, 5, 5, 5, 4, -1000, -11, -1000, -1000, + -11, -11, -11, -1000, -11, 3, -1000, -1000, -1000, -1000, + -1000, -1000, -12, -9, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, +} - // When the current operator is a "!", the next one can't be a logical operator. - if operator != "!" && (next == "&" || next == "|") { - if operator == "&" { - if len(rules) > 1 { - rules = []Filter{&Chain{op: All, rules: rules}} - } +var yyPgo = [...]int8{ + 0, 5, 22, 29, 15, 26, 25, 23, 17, 0, + 31, +} - operator = next - } else if operator == "|" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +var yyR1 = [...]int8{ + 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, + 5, 5, 6, 9, 8, 8, 10, 10, 7, 7, + 7, 7, 7, 7, 7, 7, +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +var yyR2 = [...]int8{ + 0, 3, 1, 3, 3, 1, 3, 1, 2, 3, + 3, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +} - rules = append(rules, rule) - } +var yyChk = [...]int16{ + -1000, -1, -2, -3, -4, -8, 15, -10, 14, 13, + -10, -10, -10, -5, 17, -9, -6, 12, -2, -2, + -4, -4, -1, -7, 4, 5, 6, 7, 8, 10, + 9, 11, 18, -9, +} - continue - } +var yyDef = [...]int8{ + 14, -2, 2, 5, 7, 0, 15, 14, 16, 17, + 14, 14, 14, 8, 14, 12, 11, 13, 3, 1, + 4, 6, 0, 0, 18, 19, 20, 21, 22, 23, + 24, 25, 9, 10, +} - return nil, p.parseError(next, fmt.Sprintf("operator level %d", nestingLevel)) - } else { - if negate { - negate = false - rules = append(rules, &Chain{op: None, rules: []Filter{condition}}) - } else { - rules = append(rules, condition) - } +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 15, 3, 3, 3, 3, 14, 3, + 17, 18, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 13, +} - if next == "" { - break - } +var yyTok2 = [...]int8{ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 16, +} - if next == ")" { - p.openParenthesis-- +var yyTok3 = [...]int8{ + 0, +} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} - break - } +/* parser for yacc output */ - return nil, p.parseError(next, "") - } +var ( + yyDebug = 0 + yyErrorVerbose = false +) - if next == operator { - continue - } +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} - if next == "&" || next == "|" { - if operator == "" || operator == "&" { - if operator == "&" && len(rules) > 1 { - all := &Chain{op: All, rules: rules} - rules = []Filter{all} - } +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} - operator = next - } else if operator == "" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +func (p *yyParserImpl) Lookahead() int { + return p.char +} - rules = append(rules, rule) - } +func yyNewParser() yyParser { + return &yyParserImpl{} +} - continue - } +const yyFlag = -1000 - return nil, p.parseError(next, "") +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] } } + return __yyfmt__.Sprintf("tok-%v", c) +} - if nestingLevel == 0 && p.pos < p.length { - return nil, p.parseError(operator, "Did not read full filter") +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } } + return __yyfmt__.Sprintf("state-%v", s) +} - if nestingLevel == 0 && p.openParenthesis > 0 { - return nil, fmt.Errorf("invalid filter '%s', missing %d closing ')' at pos %d", p.tag, p.openParenthesis, p.pos) - } +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 - if nestingLevel == 0 && p.openParenthesis < 0 { - return nil, fmt.Errorf("invalid filter '%s', unexpected closing ')' at pos %d", p.tag, p.pos) + if !yyErrorVerbose { + return "syntax error" } - var chain Filter - switch operator { - case "&": - chain = &Chain{op: All, rules: rules} - case "|": - chain = &Chain{op: Any, rules: rules} - case "!": - chain = &Chain{op: None, rules: rules} - case "": - if nestingLevel == 0 && rules != nil { - // There is only one filter tag, no chain - return rules[0], nil + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg } - - chain = &Chain{op: All, rules: rules} - default: - return nil, p.parseError(operator, "") } - return chain, nil -} + res := "syntax error: unexpected " + yyTokname(lookAhead) -// readCondition reads the next filter.Filter. -// returns nil if there is no char to read and an error on parsing failure. -func (p *Parser) readCondition() (Filter, error) { - column, err := p.readColumn() - if err != nil || column == "" { - return nil, err - } + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) - operator := "" - if strings.Contains("=>= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } } - if operator == "" { - return NewExists(column), nil - } + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } - if strings.Contains(">= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) } - } - value, err := p.readValue() - if err != nil { - return nil, err + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } } - condition, err := p.createCondition(column, operator, value) - if err != nil { - return nil, err + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) } - - return condition, nil + return res } -// createCondition creates a filter.Filter based on the given operator. -// returns nil when invalid operator is given. -func (p *Parser) createCondition(column string, operator string, value string) (Filter, error) { - column = strings.TrimSpace(column) - switch operator { - case "=": - if strings.Contains(value, "*") { - return &Condition{op: Like, column: column, value: value}, nil +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out } - - return &Condition{op: Equal, column: column, value: value}, nil - case "!=": - if strings.Contains(value, "*") { - return &Condition{op: UnLike, column: column, value: value}, nil + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out } + } - return &Condition{op: UnEqual, column: column, value: value}, nil - case ">": - return &Condition{op: GreaterThan, column: column, value: value}, nil - case ">=": - return &Condition{op: GreaterThanEqual, column: column, value: value}, nil - case "<": - return &Condition{op: LessThan, column: column, value: value}, nil - case "<=": - return &Condition{op: LessThanEqual, column: column, value: value}, nil - default: - return nil, fmt.Errorf("invalid operator %s provided", operator) +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token } -// readColumn reads a column name from the Parser.tag. -// returns empty string if there is no char to read. -func (p *Parser) readColumn() (string, error) { - return url.QueryUnescape(p.readUntil("=()&|><") - if value == "" { - return "", nil +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) } - return url.QueryUnescape(value) -} + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate -// readUntil reads chars until any of the given characters -// May return empty string if there is no char to read -func (p *Parser) readUntil(chars string) string { - var buffer string - for char := p.readChar(); char != ""; char = p.readChar() { - if strings.Contains(chars, char) { - p.pos-- - break +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- } - - buffer += char + goto yystack } - return buffer -} - -// readChar peeks the next char of the Parser.tag and increments the Parser.pos by one -// returns empty if there is no char to read -func (p *Parser) readChar() string { - if p.pos < p.length { - pos := p.pos - p.pos++ +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } - return string(p.tag[pos]) + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } - return "" -} + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 -// nextChar peeks the next char from the parser tag -// returns empty string if there is no char to read -func (p *Parser) nextChar() string { - if p.pos < p.length { - return string(p.tag[p.pos]) + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } } - return "" -} + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } -// parseError returns a formatted and detailed parser error. -// If you don't provide the char that causes the parser to fail, the char at `p.pos` is automatically used. -// By specifying the `msg` arg you can provide additional err hints that can help debugging. -func (p *Parser) parseError(invalidChar string, msg string) error { - if invalidChar == "" { - pos := p.pos - if p.pos == p.length { - pos-- + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) } - - invalidChar = string(p.tag[pos]) } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 2: + yyDollar = yyS[yypt-1 : yypt+1] + { + yylex.(*Lexer).rule = yyVAL.expr + } + case 3: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 4: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + } + case 6: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + } + case 8: + yyDollar = yyS[yypt-2 : yypt+1] + { + if yyDollar[1].text != "" { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + yyVAL.expr, _ = NewChain(None, yyDollar[2].expr) + } else { + yyVAL.expr = yyDollar[2].expr + } + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.expr = yyDollar[2].expr + if chain, ok := yyVAL.expr.(*Chain); ok { + chain.grouped = true + } + } + case 10: + yyDollar = yyS[yypt-3 : yypt+1] + { + cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } - if msg != "" { - msg = ": " + msg - } + yyVAL.expr = cond + } + case 12: + yyDollar = yyS[yypt-1 : yypt+1] + { + yyVAL.expr = NewExists(yyDollar[1].text) + } + case 13: + yyDollar = yyS[yypt-1 : yypt+1] + { + column, err := url.QueryUnescape(yyDollar[1].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } - return fmt.Errorf("invalid filter '%s', unexpected %s at pos %d%s", p.tag, invalidChar, p.pos, msg) + yyVAL.text = column + } + case 14: + yyDollar = yyS[yypt-0 : yypt+1] + { + yyVAL.text = "" + } + } + goto yystack /* stack new state and value */ } diff --git a/internal/filter/parser.y b/internal/filter/parser.y new file mode 100644 index 00000000..c9bd14f1 --- /dev/null +++ b/internal/filter/parser.y @@ -0,0 +1,216 @@ +%{ + +package filter + +import ( + "net/url" + "slices" +) + +// reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). +// When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not +// of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. +// Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The first argument `left` is supposed to be a filter.Any Chain containing the first two conditions. +// We then call this function when the parser is processing the logical `&` op and the Unlike condition, +// and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +func reduceFilter(left Filter, op string, right Filter) Filter { + chain, ok := left.(*Chain) + if ok && chain.op == Any && LogicalOp(op) == All { + // Retrieve the last pushed filter Condition and append it to the new "And" chain instead + back := chain.pop() + // Chain#pop can return a filter Chain, and since we are only allowed to regroup two filter conditions, + // we must traverse the last element of every single popped Chain till we reach a filter condition. + for back != nil { + if backChain, ok := back.(*Chain); !ok || backChain.grouped { + // If the popped element is not of type filter Chain or the filter chain is parenthesized, + // we don't need to continue here, so break out of the loop. + break + } + + // Re-add the just popped item before stepping into it and popping its last item. + chain.add(back) + + chain = back.(*Chain) + back = chain.pop() + } + + andChain, _ := NewChain(All, back) + // We don't need to regroup an already grouped filter chain, since braces gain + // a higher precedence than any logical operators. + if anyChain, ok := right.(*Chain); ok && anyChain.op == Any && !chain.grouped && !anyChain.grouped { + andChain.add(anyChain.top()) + // Prepend the newly created All chain + anyChain.rules = slices.Insert[[]Filter, Filter](anyChain.rules, 0, andChain) + chain.add(anyChain) + } else { + andChain.add(right) + chain.add(andChain) + } + + return left + } + + // If the given operator is the same as the already existing chains operator (*chain), + // we don't need to create another chain of the same operator type. Avoids something + // like &Chain{op: All, &Chain{op: All, ...}} + if chain == nil || chain.op != LogicalOp(op) { + var err error + chain, err = NewChain(LogicalOp(op), left) + if err != nil { + // Just panic, filter.Parse will try to recover from this. + panic(err) + } + } + + chain.add(right) + + return chain +} +%} + +%union { + expr Filter + text string +} + +%type filter_rule +%type filter_chain_list +%type filter_chain +%type maybe_negated_condition_expr +%type condition_expr +%type exists_expr + +%type comparison_op +%type optional_negation +%type identifier +%type logical_op + +%token T_EQUAL +%token T_UNEQUAL +%token T_LIKE +%token T_UNLIKE +%token T_LESS_THAN +%token T_GREATER_THAN +%token T_LESS_THAN_OR_EQUAL +%token T_GREATER_THAN_OR_EQUAL +%token T_IDENTIFIER + +%type "|" "&" +%type "!" + +// This is just used for declaring explicit precedence and resolves shift/reduce conflicts +// in `filter_chain_list` and `filter_chain` rules. +%nonassoc PREFER_SHIFTING_LOGICAL_OP + +%nonassoc T_EQUAL T_UNEQUAL T_LIKE T_UNLIKE +%nonassoc T_LESS_THAN T_LESS_THAN_OR_EQUAL T_GREATER_THAN T_GREATER_THAN_OR_EQUAL + +%left "|" "&" +%left "!" +%left "(" +%right ")" + +%% + +filter_rule: filter_chain_list logical_op filter_chain_list + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } + | filter_chain_list %prec PREFER_SHIFTING_LOGICAL_OP + { + yylex.(*Lexer).rule = $$ + } + | filter_rule logical_op filter_chain_list + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } + ; + +filter_chain_list: filter_chain logical_op maybe_negated_condition_expr + { + $$ = reduceFilter($1, $2, $3) + } + | filter_chain %prec PREFER_SHIFTING_LOGICAL_OP + ; + +filter_chain: maybe_negated_condition_expr logical_op maybe_negated_condition_expr + { + $$ = reduceFilter($1, $2, $3) + } + | maybe_negated_condition_expr %prec PREFER_SHIFTING_LOGICAL_OP + ; + +maybe_negated_condition_expr: optional_negation condition_expr + { + if $1 != "" { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + $$, _ = NewChain(None, $2) + } else { + $$ = $2 + } + } + ; + +condition_expr: "(" filter_rule ")" + { + $$ = $2 + if chain, ok := $$.(*Chain); ok { + chain.grouped = true + } + } + | identifier comparison_op identifier + { + cond, err := NewCondition($1, CompOperator($2), $3) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + $$ = cond + } + | exists_expr + ; + +exists_expr: identifier + { + $$ = NewExists($1) + } + ; + +identifier: T_IDENTIFIER + { + column, err := url.QueryUnescape($1) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + $$ = column + } + ; + +optional_negation: /* empty */ { $$ = "" } + | "!" + ; + +logical_op: "&" + | "|" + ; + +comparison_op: T_EQUAL + | T_UNEQUAL + | T_LIKE + | T_UNLIKE + | T_LESS_THAN + | T_LESS_THAN_OR_EQUAL + | T_GREATER_THAN + | T_GREATER_THAN_OR_EQUAL + ; + +%% diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index ca386f67..45250362 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -9,57 +9,86 @@ import ( func TestParser(t *testing.T) { t.Parallel() - t.Run("MissingLogicalOperatorsAfterConditionsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d)e=f") - - expected := "invalid filter '(a=b|c=d)e=f', unexpected e at pos 10: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + t.Run("ParseInvalidFilters", func(t *testing.T) { + t.Parallel() - t.Run("MissingLogicalOperatorsAfterOperatorsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d|)e=f") + _, err := Parse("(a=b|c=d)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \"column or value\", expecting \"|\" or \"&\"") - expected := "invalid filter '(a=b|c=d|)e=f', unexpected e at pos 11: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + _, err = Parse("(a=b|c=d|)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") - t.Run("ParserIdentifiesInvalidExpression", func(t *testing.T) { - _, err := Parse("col=(") - assert.EqualError(t, err, "invalid filter 'col=(', unexpected ( at pos 5", "Errors should be the same") + _, err = Parse("col=(") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("(((x=a)&y=b") - assert.EqualError(t, err, "invalid filter '(((x=a)&y=b', missing 2 closing ')' at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \"|\" or \"&\" or \")\"") _, err = Parse("(x=a)&y=b)") - assert.EqualError(t, err, "invalid filter '(x=a)&y=b)', unexpected ) at pos 10", "Errors should be the same") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"|\" or \"&\"") _, err = Parse("!(&") - assert.EqualError(t, err, "invalid filter '!(&', unexpected & at pos 3", "Errors should be the same") - - _, err = Parse("!(!&") - assert.EqualError(t, err, "invalid filter '!(!&', unexpected & at pos 4: operator level 1", "Errors should be the same") - - _, err = Parse("!(|test") - assert.EqualError(t, err, "invalid filter '!(|test', unexpected | at pos 3", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=(te(st)', unexpected ( at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("foo&bar=te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=te(st)', unexpected ( at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\", expecting \"|\" or \"&\"") _, err = Parse("foo&bar=test)") - assert.EqualError(t, err, "invalid filter 'foo&bar=test)', unexpected ) at pos 13", "Errors should be the same") + assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\", expecting \"|\" or \"&\"") _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "invalid filter '!()|&()&)', unexpected closing ')' at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") + + _, err = Parse("=foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"=\", expecting \"column or value\" or \"(\"") + + _, err = Parse("foo>") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting \"column or value\"") + + _, err = Parse("foo==") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"=\", expecting \"column or value\"") + + _, err = Parse("=>foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"=\", expecting \"column or value\" or \"(\"") + + _, err = Parse("&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") + + _, err = Parse("&&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") + + _, err = Parse("(&foo=bar)") + assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") + + _, err = Parse("(foo=bar|)") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") + + _, err = Parse("((((((") + assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting \"column or value\" or \"(\"") + + _, err = Parse("foo&bar&col=val!=val") + assert.EqualError(t, err, "1:17 (16): syntax error: unexpected \"!=\", expecting \"|\" or \"&\"") + + _, err = Parse("col%7umn") + assert.EqualError(t, err, "invalid URL escape \"%7u\"") + + _, err = Parse("((0&((((((((((((((((((((((0=0)") + assert.EqualError(t, err, "1:31 (30): syntax error: unexpected $end, expecting \"|\" or \"&\" or \")\"") + + // IPL web filter parser accepts such invalid strings, but our Lexer doesn't. + _, err = Parse("foo\x00") + assert.EqualError(t, err, "1:1 (0): invalid character NUL") + + _, err = Parse("\xff") + assert.EqualError(t, err, "0:0 (0): invalid UTF-8 encoding") }) -} -func TestFilter(t *testing.T) { - t.Parallel() + t.Run("ParseAllKindOfSimpleFilters", func(t *testing.T) { + t.Parallel() - t.Run("ParserIdentifiesAllKindOfFilters", func(t *testing.T) { rule, err := Parse("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) expected := &Condition{op: Equal, column: "foo", value: "bar"} @@ -70,12 +99,12 @@ func TestFilter(t *testing.T) { expected = &Condition{op: UnEqual, column: "foo", value: "bar"} assert.Equal(t, expected, rule) - rule, err = Parse("foo=bar*") + rule, err = Parse("foo~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: Like, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) - rule, err = Parse("foo!=bar*") + rule, err = Parse("foo!~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: UnLike, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) @@ -119,42 +148,288 @@ func TestFilter(t *testing.T) { rule, err = Parse("foo") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.Equal(t, &Exists{column: "foo"}, rule) + }) + + t.Run("ParseChain", func(t *testing.T) { + t.Parallel() - rule, err = Parse("!(foo=bar|bar=foo)&(foo=bar|bar=foo)") + var expected Filter + rule, err := Parse("!foo=bar") + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} assert.Nil(t, err, "There should be no errors but got: %s", err) + assert.Equal(t, expected, rule) - expectedChain := &Chain{op: All, rules: []Filter{ - &Chain{op: None, rules: []Filter{ + rule, err = Parse("foo=bar&bar=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar=foo|col=val") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, - &Chain{op: Any, rules: []Filter{ + &Condition{op: Equal, column: "col", value: "val"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar|bar=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: Any, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Condition{op: Equal, column: "foo", value: "bar"} + assert.Equal(t, expected, rule) + + rule, err = Parse("(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, grouped: true, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, }} - assert.Equal(t, expectedChain, rule) - }) + assert.Equal(t, expected, rule) - t.Run("ParserIdentifiesSingleCondition", func(t *testing.T) { - rule, err := Parse("foo=bar") + rule, err = Parse("((!foo=bar)&bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, grouped: true, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) - expected := &Condition{op: Equal, column: "foo", value: "bar"} - assert.Equal(t, expected, rule, "Parser does not parse single condition correctly") + rule, err = Parse("!foo&!bar") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, + &Chain{op: None, rules: []Filter{&Exists{column: "bar"}}}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo|bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, + &Exists{column: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!(foo|bar))") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ + &Exists{column: "foo"}, + &Exists{column: "bar"}}, + }, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)&(foo!=bar|bar!=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }}, + }}, + &Chain{op: Any, grouped: true, rules: []Filter{ + &Condition{op: UnEqual, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo&john>doe|doedoe|doedoe|doedoe|doebar") @@ -205,10 +486,14 @@ func FuzzParser(f *testing.F) { f.Add("col%29umn>val%29ue") f.Fuzz(func(t *testing.T, expr string) { - _, err := Parse(expr) + rule, err := Parse(expr) + t.Logf("Parsing filter expression %q - ERROR: %v", expr, err) if strings.Count(expr, "(") != strings.Count(expr, ")") { assert.Error(t, err) + assert.Nil(t, rule) + } else if err == nil && !strings.ContainsAny(expr, "!&|!>~<=") { + assert.IsType(t, new(Exists), rule) } }) } diff --git a/internal/filter/types.go b/internal/filter/types.go index 4c1104d0..4aedba6d 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -22,6 +22,19 @@ const ( type Chain struct { op LogicalOp // The filter chain operator to be used to evaluate the rules rules []Filter + + // grouped indicates whether a given filter chain is wrapped within parentheses `(foo&bar)`. + // You don't and won't ever need to access this property, it's purely meant to be used by the parser. + grouped bool +} + +func NewChain(op LogicalOp, rules ...Filter) (*Chain, error) { + switch op { + case None, All, Any: + return &Chain{rules: rules, op: op}, nil + default: + return nil, fmt.Errorf("invalid logical operator provided: %q", op) + } } // Eval evaluates the filter rule sets recursively based on their operator type. @@ -80,6 +93,31 @@ func (c *Chain) ExtractConditions() []*Condition { return conditions } +// pop pops the last filter from the rules slice (if not empty) and returns it. +func (c *Chain) pop() Filter { + var rule Filter + if l := len(c.rules); l > 0 { + rule, c.rules = c.rules[l-1], c.rules[:l-1] + } + + return rule +} + +// top picks and erases the first element from its rules and returns it. +func (c *Chain) top() Filter { + var rule Filter + if len(c.rules) > 0 { + rule, c.rules = c.rules[0], c.rules[1:] + } + + return rule +} + +// add adds the given filter rules to the current chain. +func (c *Chain) add(rules ...Filter) { + c.rules = append(c.rules, rules...) +} + // CompOperator is a type used for grouping the individual comparison operators of a filter string. type CompOperator string @@ -105,6 +143,17 @@ type Condition struct { value string } +// NewCondition initiates a new Condition instance from the given data. +// Returns error if invalid CompOperator is provided. +func NewCondition(column string, op CompOperator, value string) (Filter, error) { + switch op { + case Equal, UnEqual, Like, UnLike, LessThan, LessThanEqual, GreaterThan, GreaterThanEqual: + return &Condition{op: op, column: column, value: value}, nil + default: + return nil, fmt.Errorf("invalid comparison operator provided: %q", op) + } +} + // Eval evaluates this Condition based on its operator. // Returns true when the filter evaluates to true false otherwise. func (c *Condition) Eval(filterable Filterable) (bool, error) { diff --git a/internal/object/object_test.go b/internal/object/object_test.go index ee6f250f..1989adce 100644 --- a/internal/object/object_test.go +++ b/internal/object/object_test.go @@ -29,8 +29,8 @@ func TestFilter(t *testing.T) { {"Host", false}, {"service", false}, {"!service", true}, - {"host=*.example.com&hostgroup/database-server", true}, - {"host=*.example.com&!hostgroup/database-server", false}, + {"host~*.example.com&hostgroup/database-server", true}, + {"host~*.example.com&!hostgroup/database-server", false}, {"!service&(country=DE&hostgroup/database-server)", true}, {"!service&!(country=AT|country=CH)", true}, {"hostgroup/Nuremberg %28Germany%29", true},