From 0b806878a2a9668b87534a642d37879cce5fba6f Mon Sep 17 00:00:00 2001 From: Bradley Kemp Date: Fri, 30 Aug 2024 15:48:03 +0100 Subject: [PATCH 1/5] mvp --- evaluator/evaluate.go | 177 ++++++++++++++++++++++++++++++- evaluator/evaluate_search.go | 26 +++-- evaluator/evaluate_test.go | 60 +++++++++++ evaluator/indexes.go | 6 +- evaluator/modifiers/modifiers.go | 50 +++++---- evaluator/options.go | 2 + go.mod | 3 +- go.sum | 2 + 8 files changed, 288 insertions(+), 38 deletions(-) diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go index 745b7e9..06101c7 100644 --- a/evaluator/evaluate.go +++ b/evaluator/evaluate.go @@ -4,8 +4,10 @@ import ( "context" "encoding/json" "fmt" - "github.com/bradleyjkemp/sigma-go" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" + aho_corasick "github.com/pgavlin/aho-corasick" + "unsafe" ) type RuleEvaluator struct { @@ -17,6 +19,7 @@ type RuleEvaluator struct { expandPlaceholder func(ctx context.Context, placeholderName string) ([]string, error) caseSensitive bool + comparators map[string]modifiers.Comparator count func(ctx context.Context, gb GroupedByValues) (float64, error) average func(ctx context.Context, gb GroupedByValues, value float64) (float64, error) @@ -30,6 +33,7 @@ type RuleEvaluator struct { // For example, if a Sigma rule has a condition like this (attempting to detect login brute forcing) // // detection: +// // login_attempt: // # something here // condition: @@ -40,6 +44,7 @@ type RuleEvaluator struct { // Each different GroupedByValues points to a different box. // // GroupedByValues +// // || // ___↓↓___ ________ // | User A | | User B | @@ -72,6 +77,64 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator { return e } +// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of +// more efficient string matching algorithms +func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { + bundle := RuleEvaluatorBundle{ + ahocorasick: map[string]ahocorasickSearcher{}, + } + + values := map[string][]string{} + + for _, rule := range rules { + e := &RuleEvaluator{Rule: rule} + for _, option := range options { + option(e) + } + + bundle.evaluators = append(bundle.evaluators, e) + + for _, search := range rule.Detection.Searches { + for _, matcher := range search.EventMatchers { + for _, fieldMatcher := range matcher { + for _, value := range fieldMatcher.Values { + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], value.(string)) // todo use coerceString + } + } + } + } + } + + caseSensitive := false + if len(bundle.evaluators) > 0 { + caseSensitive = bundle.evaluators[0].caseSensitive + } + + for field, fieldValues := range values { + builder := aho_corasick.NewAhoCorasickBuilder(aho_corasick.Opts{ + AsciiCaseInsensitive: caseSensitive, // TODO: parse this out from the options + MatchOnlyWholeWords: false, + MatchKind: aho_corasick.StandardMatch, + DFA: false, // TODO: benchmark + }) + bundle.ahocorasick[field] = ahocorasickSearcher{ + AhoCorasick: builder.Build(fieldValues), + patterns: fieldValues, + } + } + return bundle +} + +type RuleEvaluatorBundle struct { + ahocorasick map[string]ahocorasickSearcher + evaluators []*RuleEvaluator +} + +type ahocorasickSearcher struct { + aho_corasick.AhoCorasick + patterns []string +} + type Result struct { Match bool // whether this event matches the Sigma rule SearchResults map[string]bool // For each Search, whether it matched the event @@ -92,6 +155,116 @@ func eventValue(e Event, key string) interface{} { } } +type ahocorasickSearch struct { + field string + haystack *byte +} + +type RuleResult struct { + Result + sigma.Rule +} + +func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) { + if len(bundle.evaluators) == 0 { + fmt.Println("no evaluators in bundle!") + return nil, nil + } + + // copy the current rule comparators + comparators := map[string]modifiers.Comparator{} + for name, comparator := range bundle.evaluators[0].comparators { + comparators[name] = comparator + } + + c := &ahocorasickContains{ + //Comparator: comparators["contains"], // fall back to the normal contains comparator for non MatchField calls + matchers: bundle.ahocorasick, + results: map[ahocorasickSearch]map[string]bool{}, + } + // override the contains comparator to use our custom one + comparators["contains"] = c + + ruleresults := []RuleResult{} + for _, rule := range bundle.evaluators { + result := Result{ + Match: false, + SearchResults: map[string]bool{}, + ConditionResults: make([]bool, len(rule.Detection.Conditions)), + } + for identifier, search := range rule.Detection.Searches { + var err error + result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators) + if err != nil { + return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err) + } + } + + for conditionIndex, condition := range rule.Detection.Conditions { + searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults) + + switch { + // Event didn't match filters + case !searchMatches: + result.ConditionResults[conditionIndex] = false + continue + + // Simple query without any aggregation + case searchMatches && condition.Aggregation == nil: + result.ConditionResults[conditionIndex] = true + result.Match = true + continue // need to continue in case other conditions contain aggregations that need to be evaluated + + // Search expression matched but still need to see if the aggregation returns true + case searchMatches && condition.Aggregation != nil: + aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event) + if err != nil { + return nil, err + } + if aggregationMatches { + result.Match = true + result.ConditionResults[conditionIndex] = true + } + continue + } + } + + ruleresults = append(ruleresults, RuleResult{ + Rule: rule.Rule, + Result: result, + }) + } + return ruleresults, nil +} + +type ahocorasickContains struct { + runCount int + modifiers.Comparator + matchers map[string]ahocorasickSearcher + results map[ahocorasickSearch]map[string]bool +} + +func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) { + haystack := modifiers.CoerceString(actual) + search := ahocorasickSearch{ + field: field, + haystack: unsafe.StringData(haystack), + } + //search := haystack + existingResult, ok := a.results[search] + if !ok { + a.runCount++ + a.results[search] = map[string]bool{} + matcher := a.matchers[field] + for _, match := range matcher.FindAll(haystack) { + a.results[search][matcher.patterns[match.Pattern()]] = true + } + existingResult = a.results[search] + } + + return existingResult[modifiers.CoerceString(expected)], nil +} + func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) { result := Result{ Match: false, @@ -100,7 +273,7 @@ func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, err } for identifier, search := range rule.Detection.Searches { var err error - result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event) + result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, rule.comparators) if err != nil { return Result{}, fmt.Errorf("error evaluating search %s: %w", identifier, err) } diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go index 844e506..aeb2a0b 100644 --- a/evaluator/evaluate_search.go +++ b/evaluator/evaluate_search.go @@ -4,14 +4,13 @@ import ( "context" "encoding/json" "fmt" + "github.com/PaesslerAG/jsonpath" + "github.com/bradleyjkemp/sigma-go" "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" "path" "reflect" "regexp" "strings" - - "github.com/PaesslerAG/jsonpath" - "github.com/bradleyjkemp/sigma-go" ) func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, searchResults map[string]bool) bool { @@ -84,7 +83,7 @@ func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, sear panic(fmt.Sprintf("unhandled node type %T", search)) } -func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event) (bool, error) { +func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event, comparators map[string]modifiers.Comparator) (bool, error) { if len(search.Keywords) > 0 { return false, fmt.Errorf("keywords unsupported") } @@ -112,11 +111,7 @@ eventMatcher: // field matchers can specify modifiers (FieldName|modifier1|modifier2) which change the matching behaviour var comparator modifiers.ComparatorFunc var err error - if rule.caseSensitive { - comparator, err = modifiers.GetComparatorCaseSensitive(fieldModifiers...) - } else { - comparator, err = modifiers.GetComparator(fieldModifiers...) - } + comparator, err = modifiers.GetComparator(fieldMatcher.Field, comparators, fieldModifiers...) if err != nil { return false, err } @@ -196,9 +191,22 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([ return nil, err } + //values := toGenericSlice(v) + //for _, value := range values { + // if stringValue, ok := value.(string); ok && intern != nil { + // interned, ok := intern[stringValue] + // if !ok { + // intern[stringValue] = stringValue + // interned = stringValue + // } + // value = interned + // } + // actualValues = append(actualValues, value) + //} actualValues = append(actualValues, toGenericSlice(v)...) } } + return actualValues, nil } diff --git a/evaluator/evaluate_test.go b/evaluator/evaluate_test.go index 797aea1..0b2593c 100644 --- a/evaluator/evaluate_test.go +++ b/evaluator/evaluate_test.go @@ -2,6 +2,7 @@ package evaluator import ( "context" + "fmt" "testing" "github.com/bradleyjkemp/sigma-go" @@ -92,6 +93,65 @@ func TestRuleEvaluator_Matches(t *testing.T) { } } +func TestRuleEvaluatorBundle_Matches(t *testing.T) { + r1 := sigma.Rule{ + Detection: sigma.Detection{ + Searches: map[string]sigma.Search{ + "foo": { + EventMatchers: []sigma.EventMatcher{ + { + { + Field: "field", + Modifiers: []string{"contains"}, + Values: []interface{}{ + "foo", + }, + }, + }, + }, + }, + }, + Conditions: []sigma.Condition{{ + Search: sigma.AllOfThem{}, + }, + }, + }, + } + r2 := sigma.Rule{ + Detection: sigma.Detection{ + Searches: map[string]sigma.Search{ + "foo": { + EventMatchers: []sigma.EventMatcher{ + { + { + Field: "field", + Modifiers: []string{"contains"}, + Values: []interface{}{ + "bar", + }, + }, + }, + }, + }, + }, + Conditions: []sigma.Condition{{ + Search: sigma.AllOfThem{}, + }, + }, + }, + } + + bundle := ForRules([]sigma.Rule{r1, r2}) + + results, err := bundle.Matches(context.Background(), map[string]interface{}{ + "field": "foobar", + }) + if err != nil { + t.Fatal(err) + } + fmt.Println(results) +} + func TestRuleEvaluator_Matches_WithPlaceholder(t *testing.T) { rule := ForRule(sigma.Rule{ Detection: sigma.Detection{ diff --git a/evaluator/indexes.go b/evaluator/indexes.go index 5b3c698..1f45ad5 100644 --- a/evaluator/indexes.go +++ b/evaluator/indexes.go @@ -64,8 +64,8 @@ func (rule RuleEvaluator) Indexes() []string { } // RelevantToEvent calculates whether a rule is applicable to an event based on: -// * Whether the rule has been configured with a config file that matches the eventIndex -// * Whether the event matches the conditions from the config file +// - Whether the rule has been configured with a config file that matches the eventIndex +// - Whether the event matches the conditions from the config file func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string, event Event) (bool, error) { matchedIndex := false for _, index := range rule.indexes { @@ -82,7 +82,7 @@ func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string // need to check for any value constraints that have been specified // TODO: this doesn't yet support the logsourcemerging option to choose between ANDing/ORing these conditions for _, condition := range rule.indexConditions { - searchMatches, err := rule.evaluateSearch(ctx, condition, event) + searchMatches, err := rule.evaluateSearch(ctx, condition, event, rule.comparators) if err != nil { return false, fmt.Errorf("failed to evaluate index condition: %w", err) } diff --git a/evaluator/modifiers/modifiers.go b/evaluator/modifiers/modifiers.go index 58be472..b63f29f 100644 --- a/evaluator/modifiers/modifiers.go +++ b/evaluator/modifiers/modifiers.go @@ -11,15 +11,10 @@ import ( "gopkg.in/yaml.v3" ) -func GetComparator(modifiers ...string) (ComparatorFunc, error) { - return getComparator(Comparators, modifiers...) -} - -func GetComparatorCaseSensitive(modifiers ...string) (ComparatorFunc, error) { - return getComparator(ComparatorsCaseSensitive, modifiers...) -} - -func getComparator(comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) { +func GetComparator(field string, comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) { + if comparators == nil { + comparators = Comparators + } if len(modifiers) == 0 { return baseComparator{}.Matches, nil } @@ -69,7 +64,11 @@ func getComparator(comparators map[string]Comparator, modifiers ...string) (Comp } } - return comparator.Matches(actual, expected) + if fieldComparator, ok := comparator.(FieldComparator); ok { + return fieldComparator.MatchesField(field, actual, expected) + } else { + return comparator.Matches(actual, expected) + } }, nil } @@ -79,6 +78,11 @@ type Comparator interface { Matches(actual any, expected any) (bool, error) } +// FieldComparator is an optional extension to Comparator which also passes the field name +type FieldComparator interface { + MatchesField(field string, actual any, expected any) (bool, error) +} + type ComparatorFunc func(actual, expected any) (bool, error) // ValueModifier modifies the expected value before it is passed to the comparator. @@ -127,7 +131,7 @@ func (baseComparator) Matches(actual, expected any) (bool, error) { return true, nil default: // The Sigma spec defines that by default comparisons are case-insensitive - return strings.EqualFold(coerceString(actual), coerceString(expected)), nil + return strings.EqualFold(CoerceString(actual), CoerceString(expected)), nil } } @@ -135,67 +139,67 @@ type contains struct{} func (contains) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.Contains(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.Contains(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type endswith struct{} func (endswith) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.HasSuffix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.HasSuffix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type startswith struct{} func (startswith) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.HasPrefix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.HasPrefix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type containsCS struct{} func (containsCS) Matches(actual, expected any) (bool, error) { - return strings.Contains(coerceString(actual), coerceString(expected)), nil + return strings.Contains(CoerceString(actual), CoerceString(expected)), nil } type endswithCS struct{} func (endswithCS) Matches(actual, expected any) (bool, error) { - return strings.HasSuffix(coerceString(actual), coerceString(expected)), nil + return strings.HasSuffix(CoerceString(actual), CoerceString(expected)), nil } type startswithCS struct{} func (startswithCS) Matches(actual, expected any) (bool, error) { - return strings.HasPrefix(coerceString(actual), coerceString(expected)), nil + return strings.HasPrefix(CoerceString(actual), CoerceString(expected)), nil } type b64 struct{} func (b64) Modify(value any) (any, error) { - return base64.StdEncoding.EncodeToString([]byte(coerceString(value))), nil + return base64.StdEncoding.EncodeToString([]byte(CoerceString(value))), nil } type re struct{} func (re) Matches(actual any, expected any) (bool, error) { - re, err := regexp.Compile(coerceString(expected)) + re, err := regexp.Compile(CoerceString(expected)) if err != nil { return false, err } - return re.MatchString(coerceString(actual)), nil + return re.MatchString(CoerceString(actual)), nil } type cidr struct{} func (cidr) Matches(actual any, expected any) (bool, error) { - _, cidr, err := net.ParseCIDR(coerceString(expected)) + _, cidr, err := net.ParseCIDR(CoerceString(expected)) if err != nil { return false, err } - ip := net.ParseIP(coerceString(actual)) + ip := net.ParseIP(CoerceString(actual)) return cidr.Contains(ip), nil } @@ -227,7 +231,7 @@ func (lte) Matches(actual any, expected any) (bool, error) { return lte, err } -func coerceString(v interface{}) string { +func CoerceString(v interface{}) string { switch vv := v.(type) { case string: return vv diff --git a/evaluator/options.go b/evaluator/options.go index 4684e54..208a2d9 100644 --- a/evaluator/options.go +++ b/evaluator/options.go @@ -2,6 +2,7 @@ package evaluator import ( "context" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" "github.com/bradleyjkemp/sigma-go" ) @@ -45,4 +46,5 @@ func WithConfig(config ...sigma.Config) Option { // This can increase performance (especially for larger events) by skipping expensive calls to strings.ToLower func CaseSensitive(e *RuleEvaluator) { e.caseSensitive = true + e.comparators = modifiers.ComparatorsCaseSensitive } diff --git a/go.mod b/go.mod index af8cb26..e98c581 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/bradleyjkemp/sigma-go -go 1.18 +go 1.21 require ( github.com/PaesslerAG/jsonpath v0.1.1 @@ -13,5 +13,6 @@ require ( require ( github.com/PaesslerAG/gval v1.0.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pgavlin/aho-corasick v0.5.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect ) diff --git a/go.sum b/go.sum index 3c7dd53..2654f87 100644 --- a/go.sum +++ b/go.sum @@ -13,6 +13,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/pgavlin/aho-corasick v0.5.0 h1:gcEz9/z7CDs/KqZrdSJm6FQw4/dj2/mOho6+p77yZsw= +github.com/pgavlin/aho-corasick v0.5.0/go.mod h1:UyKgVsAp5Un59BCpzrpFkPyETFMn1tGjdbRYvoq0l2g= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= From 3afe61028196c619215f2e653430be46daf4f0c8 Mon Sep 17 00:00:00 2001 From: Bradley Kemp Date: Mon, 2 Sep 2024 14:19:33 +0100 Subject: [PATCH 2/5] Fuzz, move to new, correct library --- evaluator/evaluate.go | 79 +++++++++++++------ evaluator/evaluate_search.go | 2 + evaluator/evaluate_test.go | 4 +- evaluator/fuzz_test.go | 79 +++++++++++++++++++ evaluator/index_test.go | 3 - .../FuzzRuleBundleMatches/1907e011ae8a6fdd | 6 ++ .../FuzzRuleBundleMatches/1b692dbec8c613de | 6 ++ .../FuzzRuleBundleMatches/3a94d65bc4acc663 | 6 ++ .../FuzzRuleBundleMatches/59d99db21bdb3323 | 6 ++ .../FuzzRuleBundleMatches/6450716b6258ade2 | 6 ++ .../FuzzRuleBundleMatches/754aab3cbb754e99 | 6 ++ .../FuzzRuleBundleMatches/75e97febfc5feb9e | 6 ++ .../FuzzRuleBundleMatches/85bf2132f746b224 | 6 ++ .../FuzzRuleBundleMatches/89bbe22b303a3d8a | 6 ++ .../FuzzRuleBundleMatches/90c72819f91d52a6 | 6 ++ .../FuzzRuleBundleMatches/9fab5927760a02ef | 6 ++ .../FuzzRuleBundleMatches/ef6ebca9ff3c502d | 6 ++ go.mod | 2 +- go.sum | 4 +- 19 files changed, 214 insertions(+), 31 deletions(-) create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go index 06101c7..537b2e2 100644 --- a/evaluator/evaluate.go +++ b/evaluator/evaluate.go @@ -4,9 +4,10 @@ import ( "context" "encoding/json" "fmt" + aho_corasick "github.com/BobuSumisu/aho-corasick" "github.com/bradleyjkemp/sigma-go" "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" - aho_corasick "github.com/pgavlin/aho-corasick" + "strings" "unsafe" ) @@ -70,7 +71,7 @@ func (a GroupedByValues) Key() string { } func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator { - e := &RuleEvaluator{Rule: rule} + e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} for _, option := range options { option(e) } @@ -80,6 +81,10 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator { // ForRules compiles a set of rule evaluators which are evaluated together allowing for use of // more efficient string matching algorithms func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { + if len(rules) == 0 { + return RuleEvaluatorBundle{} + } + bundle := RuleEvaluatorBundle{ ahocorasick: map[string]ahocorasickSearcher{}, } @@ -87,7 +92,7 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { values := map[string][]string{} for _, rule := range rules { - e := &RuleEvaluator{Rule: rule} + e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} for _, option := range options { option(e) } @@ -97,8 +102,20 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { for _, search := range rule.Detection.Searches { for _, matcher := range search.EventMatchers { for _, fieldMatcher := range matcher { + contains := false + for _, modifier := range fieldMatcher.Modifiers { + if modifier == "contains" { + contains = true + } + } + if !contains { + continue + } for _, value := range fieldMatcher.Values { - values[fieldMatcher.Field] = append(values[fieldMatcher.Field], value.(string)) // todo use coerceString + if value == nil { + continue + } + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], modifiers.CoerceString(value)) } } } @@ -111,27 +128,31 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { } for field, fieldValues := range values { - builder := aho_corasick.NewAhoCorasickBuilder(aho_corasick.Opts{ - AsciiCaseInsensitive: caseSensitive, // TODO: parse this out from the options - MatchOnlyWholeWords: false, - MatchKind: aho_corasick.StandardMatch, - DFA: false, // TODO: benchmark - }) + if !caseSensitive { + // when operating in case-insensitive mode, ahocorasick only returns matches for the *first* match + // so we have to canonicalise our needles to lowercase. + // otherwise if we have both "A" and "a", we're not sure which will be returned as the match + // see: go test -run="FuzzRuleBundleMatches/1b692dbec8c613de" + for i, value := range fieldValues { + fieldValues[i] = strings.ToLower(value) + } + } bundle.ahocorasick[field] = ahocorasickSearcher{ - AhoCorasick: builder.Build(fieldValues), - patterns: fieldValues, + Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(), + patterns: fieldValues, } } return bundle } type RuleEvaluatorBundle struct { - ahocorasick map[string]ahocorasickSearcher - evaluators []*RuleEvaluator + ahocorasick map[string]ahocorasickSearcher + evaluators []*RuleEvaluator + caseSensitive bool } type ahocorasickSearcher struct { - aho_corasick.AhoCorasick + *aho_corasick.Trie patterns []string } @@ -167,7 +188,6 @@ type RuleResult struct { func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) { if len(bundle.evaluators) == 0 { - fmt.Println("no evaluators in bundle!") return nil, nil } @@ -238,31 +258,46 @@ func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]R } type ahocorasickContains struct { - runCount int + caseSensitive bool modifiers.Comparator matchers map[string]ahocorasickSearcher results map[ahocorasickSearch]map[string]bool } func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) { + if actual == nil && expected == "" { + // compatability with old |contains behaviour + // possibly a bug? + return true, nil + } haystack := modifiers.CoerceString(actual) search := ahocorasickSearch{ field: field, haystack: unsafe.StringData(haystack), } - //search := haystack + existingResult, ok := a.results[search] - if !ok { - a.runCount++ + if !ok { // haven't already computed this + if !a.caseSensitive { + haystack = strings.ToLower(haystack) + } a.results[search] = map[string]bool{} matcher := a.matchers[field] - for _, match := range matcher.FindAll(haystack) { + for _, match := range matcher.MatchString(haystack) { + // TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]? a.results[search][matcher.patterns[match.Pattern()]] = true } existingResult = a.results[search] } - return existingResult[modifiers.CoerceString(expected)], nil + needle := modifiers.CoerceString(expected) + if !a.caseSensitive { + // when operating in case-insensitive mode, search strings must be canonicalised + // (this is ok because search strings are much smaller than the haystack) + // TODO: should we just modify the rules in this case? (saving the lower-casing every time) + needle = strings.ToLower(needle) + } + return existingResult[needle], nil } func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) { diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go index aeb2a0b..598c15b 100644 --- a/evaluator/evaluate_search.go +++ b/evaluator/evaluate_search.go @@ -149,6 +149,8 @@ func (rule *RuleEvaluator) getMatcherValues(ctx context.Context, matcher sigma.F case int, float32, float64, bool: value = fmt.Sprintf("%v", abstractValue) default: + // TODO: temporary hack + return nil, nil return nil, fmt.Errorf("expected scalar field matching value got: %v (%T)", abstractValue, abstractValue) } diff --git a/evaluator/evaluate_test.go b/evaluator/evaluate_test.go index 0b2593c..cc7cbc5 100644 --- a/evaluator/evaluate_test.go +++ b/evaluator/evaluate_test.go @@ -2,7 +2,6 @@ package evaluator import ( "context" - "fmt" "testing" "github.com/bradleyjkemp/sigma-go" @@ -143,13 +142,12 @@ func TestRuleEvaluatorBundle_Matches(t *testing.T) { bundle := ForRules([]sigma.Rule{r1, r2}) - results, err := bundle.Matches(context.Background(), map[string]interface{}{ + _, err := bundle.Matches(context.Background(), map[string]interface{}{ "field": "foobar", }) if err != nil { t.Fatal(err) } - fmt.Println(results) } func TestRuleEvaluator_Matches_WithPlaceholder(t *testing.T) { diff --git a/evaluator/fuzz_test.go b/evaluator/fuzz_test.go index 5a99f54..cce0e8d 100644 --- a/evaluator/fuzz_test.go +++ b/evaluator/fuzz_test.go @@ -3,6 +3,9 @@ package evaluator import ( "context" "encoding/json" + "fmt" + "reflect" + "sync" "testing" "github.com/bradleyjkemp/sigma-go" @@ -49,3 +52,79 @@ func FuzzRuleMatches(f *testing.F) { }) } + +func FuzzRuleBundleMatches(f *testing.F) { + f.Add(testRule, testRule, testConfig, `{"foo": "bar", "bar": "baz"}`, false) + f.Fuzz(func(t *testing.T, rule1, rule2, config, payload string, caseSensitive bool) { + var r1, r2 sigma.Rule + var c sigma.Config + var err error + wg := sync.WaitGroup{} + wg.Add(1) + + go func() { + defer func() { + wg.Done() + if r := recover(); r != nil { + err = fmt.Errorf("panic in parsing") + } + }() + r1, err = sigma.ParseRule([]byte(rule1)) + if err != nil || len(r1.Detection.Searches) == 0 || len(r1.Detection.Conditions) == 0 { + return + } + r2, err = sigma.ParseRule([]byte(rule2)) + if err != nil || len(r2.Detection.Searches) == 0 || len(r2.Detection.Conditions) == 0 { + return + } + c, err = sigma.ParseConfig([]byte(config)) + if err != nil { + return + } + }() + wg.Wait() + if err != nil { + return + } + + var e Event + if err := json.Unmarshal([]byte(payload), &e); err != nil { + return + } + if reflect.TypeOf(e).Kind() != reflect.Map { + return + } + + options := []Option{WithConfig(c)} + if caseSensitive { + options = append(options, CaseSensitive) + } + + eval1 := ForRule(r1, WithConfig(c)) + eval2 := ForRule(r2, WithConfig(c)) + match1, err1 := eval1.Matches(context.Background(), e) + if err1 != nil { + return + } + match2, err2 := eval2.Matches(context.Background(), e) + if err2 != nil { + return + } + + bundle := ForRules([]sigma.Rule{r1, r2}, WithConfig(c)) + matches, errs := bundle.Matches(context.Background(), e) + if errs != nil { + panic(errs) + } + if len(matches) != 2 { + panic(fmt.Sprint("didn't get 2 matches, got", len(matches), err)) + } + + if !reflect.DeepEqual(matches[0].Result, match1) { + panic(fmt.Sprint("difference in match1\nbundle: ", matches[0].Result, "\nstandalone: ", match1)) + } + if !reflect.DeepEqual(matches[1].Result, match2) { + panic(fmt.Sprint("difference in match2\nbundle: ", matches[1].Result, "\nstandalone: ", match2)) + } + }) +} diff --git a/evaluator/index_test.go b/evaluator/index_test.go index 2c1d259..adc66d5 100644 --- a/evaluator/index_test.go +++ b/evaluator/index_test.go @@ -2,7 +2,6 @@ package evaluator import ( "context" - "fmt" "testing" "github.com/bradleyjkemp/sigma-go" @@ -44,8 +43,6 @@ func TestRuleEvaluator_RelevantToEvent_LogsourceRewriting(t *testing.T) { DefaultIndex: "", })) - fmt.Println(rule.Indexes()) - relevant := []string{ "just-category", "category-rewritten-index", diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd new file mode 100644 index 0000000..1427fc2 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo: BA\n condition: A") +string("detection:\n 0:\n Foo|contains: A\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"BA\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de new file mode 100644 index 0000000..3ae40e6 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo|contains: A\n condition: A") +string("detection:\n 0:\n Foo|contains: a\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"A\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 new file mode 100644 index 0000000..62b3328 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:") +string("detection:\n 0:\n 0:") +string("") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 new file mode 100644 index 0000000..4991c8e --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("<<:\n? 0:") +string("0") +string("0") +string("0") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 new file mode 100644 index 0000000..8e08926 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\nid: TEST_RULE\ndetection:\n :\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\ntitle: Test\nlogsMMources:\n tes Bar: product: tes \n\nfieldmappings:\n tFoo: $.foo\n Bar: $.foobar.baz\n") +string("{\"foo\": \"bar\", \"bar\": \"baz\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 new file mode 100644 index 0000000..bbf62ac --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection: \n 00:\n 00:") +string("detection: \n 0: \n 0:") +string("") +string("0") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e new file mode 100644 index 0000000..cd1b85e --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:") +string("detection:\n 0:\n 0:") +string("") +string("A") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 new file mode 100644 index 0000000..737b75d --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0: \n condition: A") +string("detection:\n 0:\n 0|contains: >") +string("0") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a new file mode 100644 index 0000000..b369b59 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection: \n 0: \n 0:") +string("detection: \n 00:\n 00|:") +string("") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 new file mode 100644 index 0000000..adfed68 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\ntitle: Test\nlogsources:\n test:\n product: test\n\nfieldmappings:\n Foo: $.foo\n Bar: $.foobar.baz\n") +string("{\"foo\": \"bAr\", \"000\": \"000\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef new file mode 100644 index 0000000..2c34913 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo|contains: BA\n condition: A") +string("detection:\n 0:\n Foo|contains: B\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"BA\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d new file mode 100644 index 0000000..311973c --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 00:\n 1:") +string("detection:\n 0:\n 0:") +string("") +string("{}") +bool(false) diff --git a/go.mod b/go.mod index e98c581..21e923d 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/bradleyjkemp/sigma-go go 1.21 require ( + github.com/BobuSumisu/aho-corasick v1.0.3 github.com/PaesslerAG/jsonpath v0.1.1 github.com/alecthomas/participle v0.7.1 github.com/bradleyjkemp/cupaloy/v2 v2.6.0 @@ -13,6 +14,5 @@ require ( require ( github.com/PaesslerAG/gval v1.0.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/pgavlin/aho-corasick v0.5.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect ) diff --git a/go.sum b/go.sum index 2654f87..7b18aa4 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g= +github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE= github.com/PaesslerAG/gval v1.0.0 h1:GEKnRwkWDdf9dOmKcNrar9EA1bz1z9DqPIO1+iLzhd8= github.com/PaesslerAG/gval v1.0.0/go.mod h1:y/nm5yEyTeX6av0OfKJNp9rBNj2XrGhAf5+v24IBN1I= github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8= @@ -13,8 +15,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/pgavlin/aho-corasick v0.5.0 h1:gcEz9/z7CDs/KqZrdSJm6FQw4/dj2/mOho6+p77yZsw= -github.com/pgavlin/aho-corasick v0.5.0/go.mod h1:UyKgVsAp5Un59BCpzrpFkPyETFMn1tGjdbRYvoq0l2g= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= From 0b38dbae8fa851b517b3c124192c9a97c05da05a Mon Sep 17 00:00:00 2001 From: Bradley Kemp Date: Mon, 2 Sep 2024 15:53:17 +0100 Subject: [PATCH 3/5] Use same technique for regexes --- evaluator/bundle.go | 258 ++++++++ evaluator/evaluate.go | 205 ------- evaluator/evaluate_search.go | 2 +- evaluator/fuzz_test.go | 11 + evaluator/restring.go | 563 ++++++++++++++++++ .../FuzzRuleBundleMatches/5767f35675911705 | 6 + .../FuzzRuleBundleMatches/8ef99a169708daef | 6 + 7 files changed, 845 insertions(+), 206 deletions(-) create mode 100644 evaluator/bundle.go create mode 100644 evaluator/restring.go create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef diff --git a/evaluator/bundle.go b/evaluator/bundle.go new file mode 100644 index 0000000..0c842c9 --- /dev/null +++ b/evaluator/bundle.go @@ -0,0 +1,258 @@ +package evaluator + +import ( + "context" + "fmt" + aho_corasick "github.com/BobuSumisu/aho-corasick" + "github.com/bradleyjkemp/sigma-go" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" + "regexp" + "strings" + "unsafe" +) + +// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of +// more efficient string matching algorithms +func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { + if len(rules) == 0 { + return RuleEvaluatorBundle{} + } + + bundle := RuleEvaluatorBundle{ + ahocorasick: map[string]ahocorasickSearcher{}, + } + + values := map[string][]string{} + + for _, rule := range rules { + e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} + for _, option := range options { + option(e) + } + + bundle.evaluators = append(bundle.evaluators, e) + bundle.caseSensitive = e.caseSensitive + + for _, search := range rule.Detection.Searches { + for _, matcher := range search.EventMatchers { + for _, fieldMatcher := range matcher { + contains := false + regex := false + for _, modifier := range fieldMatcher.Modifiers { + if modifier == "contains" { + contains = true + } + if modifier == "re" { + regex = true + } + } + switch { + case contains: // add all values to the needle set + for _, value := range fieldMatcher.Values { + if value == nil { + continue + } + stringValue := modifiers.CoerceString(value) + if !bundle.caseSensitive { + stringValue = strings.ToLower(stringValue) + } + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], stringValue) + } + case regex: // get "necessary" substrings and add to the needle set + for _, value := range fieldMatcher.Values { + ss, caseInsensitive, _ := regexStrings(modifiers.CoerceString(value)) // todo: benchmark this, should save the result? + for _, s := range ss { + if caseInsensitive { + s = strings.ToLower(s) + } + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], s) + } + } + } + + } + } + } + } + + for field, fieldValues := range values { + bundle.ahocorasick[field] = ahocorasickSearcher{ + Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(), + patterns: fieldValues, + results: map[*byte]map[string]bool{}, // used for caching results + } + } + return bundle +} + +type RuleEvaluatorBundle struct { + ahocorasick map[string]ahocorasickSearcher + evaluators []*RuleEvaluator + caseSensitive bool +} + +type ahocorasickSearcher struct { + *aho_corasick.Trie + patterns []string + results map[*byte]map[string]bool +} + +func (as ahocorasickSearcher) getResults(s string, caseSensitive bool) map[string]bool { + key := unsafe.StringData(s) // using the underlying []byte pointer means we only compute results once per interned string + result, ok := as.results[key] + if ok { + return result + } + + // haven't already computed this + if !caseSensitive { + s = strings.ToLower(s) + } + results := map[string]bool{} + as.results[key] = results + for _, match := range as.MatchString(s) { + // TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]? + as.results[key][match.MatchString()] = true + } + return results +} + +type RuleResult struct { + Result + sigma.Rule +} + +func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) { + if len(bundle.evaluators) == 0 { + return nil, nil + } + + // copy the current rule comparators + comparators := map[string]modifiers.Comparator{} + for name, comparator := range bundle.evaluators[0].comparators { + comparators[name] = comparator + } + + // override the contains comparator to use our custom one + comparators["contains"] = &ahocorasickContains{ + matchers: bundle.ahocorasick, + caseSensitive: bundle.caseSensitive, + } + comparators["re"] = &ahocorasickRe{ + matchers: bundle.ahocorasick, + } + + ruleresults := []RuleResult{} + for _, rule := range bundle.evaluators { + result := Result{ + Match: false, + SearchResults: map[string]bool{}, + ConditionResults: make([]bool, len(rule.Detection.Conditions)), + } + for identifier, search := range rule.Detection.Searches { + var err error + result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators) + if err != nil { + return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err) + } + } + + for conditionIndex, condition := range rule.Detection.Conditions { + searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults) + + switch { + // Event didn't match filters + case !searchMatches: + result.ConditionResults[conditionIndex] = false + continue + + // Simple query without any aggregation + case searchMatches && condition.Aggregation == nil: + result.ConditionResults[conditionIndex] = true + result.Match = true + continue // need to continue in case other conditions contain aggregations that need to be evaluated + + // Search expression matched but still need to see if the aggregation returns true + case searchMatches && condition.Aggregation != nil: + aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event) + if err != nil { + return nil, err + } + if aggregationMatches { + result.Match = true + result.ConditionResults[conditionIndex] = true + } + continue + } + } + + ruleresults = append(ruleresults, RuleResult{ + Rule: rule.Rule, + Result: result, + }) + } + return ruleresults, nil +} + +type ahocorasickContains struct { + caseSensitive bool + modifiers.Comparator + matchers map[string]ahocorasickSearcher +} + +func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) { + if expected == "" { + // compatability with old |contains behaviour + // possibly a bug? + return true, nil + } + + results := a.matchers[field].getResults(modifiers.CoerceString(actual), a.caseSensitive) + + needle := modifiers.CoerceString(expected) + if !a.caseSensitive { + // when operating in case-insensitive mode, search strings must be canonicalised + // (this is ok because search strings are much smaller than the haystack) + // TODO: should we just modify the rules in this case? (saving the lower-casing every time) + needle = strings.ToLower(needle) + } + return results[needle], nil +} + +type ahocorasickRe struct { + modifiers.Comparator + matchers map[string]ahocorasickSearcher +} + +func (a *ahocorasickRe) MatchesField(field string, actual any, expected any) (bool, error) { + stringRe := modifiers.CoerceString(expected) + re, err := regexp.Compile(stringRe) // todo: cache this? + if err != nil { + return false, err + } + + // this function returns a set of simple strings + // which necessarily appear if the regex matches + // If none are present in `actual`, we don't need to run the regex + ss, caseInsensitive, err := regexStrings(stringRe) + if err != nil { + return false, err + } + + haystack := modifiers.CoerceString(actual) + results := a.matchers[field].getResults(haystack, !caseInsensitive) + found := false + for _, s := range ss { + if results[s] { + found = true + break + } + } + if !found { + return false, nil + } + + // our cheap heuristic says the regex *might* match the string, + // so we have to now run the full regex + return re.MatchString(haystack), nil +} diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go index 537b2e2..99fafcb 100644 --- a/evaluator/evaluate.go +++ b/evaluator/evaluate.go @@ -4,11 +4,8 @@ import ( "context" "encoding/json" "fmt" - aho_corasick "github.com/BobuSumisu/aho-corasick" "github.com/bradleyjkemp/sigma-go" "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" - "strings" - "unsafe" ) type RuleEvaluator struct { @@ -78,84 +75,6 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator { return e } -// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of -// more efficient string matching algorithms -func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { - if len(rules) == 0 { - return RuleEvaluatorBundle{} - } - - bundle := RuleEvaluatorBundle{ - ahocorasick: map[string]ahocorasickSearcher{}, - } - - values := map[string][]string{} - - for _, rule := range rules { - e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} - for _, option := range options { - option(e) - } - - bundle.evaluators = append(bundle.evaluators, e) - - for _, search := range rule.Detection.Searches { - for _, matcher := range search.EventMatchers { - for _, fieldMatcher := range matcher { - contains := false - for _, modifier := range fieldMatcher.Modifiers { - if modifier == "contains" { - contains = true - } - } - if !contains { - continue - } - for _, value := range fieldMatcher.Values { - if value == nil { - continue - } - values[fieldMatcher.Field] = append(values[fieldMatcher.Field], modifiers.CoerceString(value)) - } - } - } - } - } - - caseSensitive := false - if len(bundle.evaluators) > 0 { - caseSensitive = bundle.evaluators[0].caseSensitive - } - - for field, fieldValues := range values { - if !caseSensitive { - // when operating in case-insensitive mode, ahocorasick only returns matches for the *first* match - // so we have to canonicalise our needles to lowercase. - // otherwise if we have both "A" and "a", we're not sure which will be returned as the match - // see: go test -run="FuzzRuleBundleMatches/1b692dbec8c613de" - for i, value := range fieldValues { - fieldValues[i] = strings.ToLower(value) - } - } - bundle.ahocorasick[field] = ahocorasickSearcher{ - Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(), - patterns: fieldValues, - } - } - return bundle -} - -type RuleEvaluatorBundle struct { - ahocorasick map[string]ahocorasickSearcher - evaluators []*RuleEvaluator - caseSensitive bool -} - -type ahocorasickSearcher struct { - *aho_corasick.Trie - patterns []string -} - type Result struct { Match bool // whether this event matches the Sigma rule SearchResults map[string]bool // For each Search, whether it matched the event @@ -176,130 +95,6 @@ func eventValue(e Event, key string) interface{} { } } -type ahocorasickSearch struct { - field string - haystack *byte -} - -type RuleResult struct { - Result - sigma.Rule -} - -func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) { - if len(bundle.evaluators) == 0 { - return nil, nil - } - - // copy the current rule comparators - comparators := map[string]modifiers.Comparator{} - for name, comparator := range bundle.evaluators[0].comparators { - comparators[name] = comparator - } - - c := &ahocorasickContains{ - //Comparator: comparators["contains"], // fall back to the normal contains comparator for non MatchField calls - matchers: bundle.ahocorasick, - results: map[ahocorasickSearch]map[string]bool{}, - } - // override the contains comparator to use our custom one - comparators["contains"] = c - - ruleresults := []RuleResult{} - for _, rule := range bundle.evaluators { - result := Result{ - Match: false, - SearchResults: map[string]bool{}, - ConditionResults: make([]bool, len(rule.Detection.Conditions)), - } - for identifier, search := range rule.Detection.Searches { - var err error - result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators) - if err != nil { - return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err) - } - } - - for conditionIndex, condition := range rule.Detection.Conditions { - searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults) - - switch { - // Event didn't match filters - case !searchMatches: - result.ConditionResults[conditionIndex] = false - continue - - // Simple query without any aggregation - case searchMatches && condition.Aggregation == nil: - result.ConditionResults[conditionIndex] = true - result.Match = true - continue // need to continue in case other conditions contain aggregations that need to be evaluated - - // Search expression matched but still need to see if the aggregation returns true - case searchMatches && condition.Aggregation != nil: - aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event) - if err != nil { - return nil, err - } - if aggregationMatches { - result.Match = true - result.ConditionResults[conditionIndex] = true - } - continue - } - } - - ruleresults = append(ruleresults, RuleResult{ - Rule: rule.Rule, - Result: result, - }) - } - return ruleresults, nil -} - -type ahocorasickContains struct { - caseSensitive bool - modifiers.Comparator - matchers map[string]ahocorasickSearcher - results map[ahocorasickSearch]map[string]bool -} - -func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) { - if actual == nil && expected == "" { - // compatability with old |contains behaviour - // possibly a bug? - return true, nil - } - haystack := modifiers.CoerceString(actual) - search := ahocorasickSearch{ - field: field, - haystack: unsafe.StringData(haystack), - } - - existingResult, ok := a.results[search] - if !ok { // haven't already computed this - if !a.caseSensitive { - haystack = strings.ToLower(haystack) - } - a.results[search] = map[string]bool{} - matcher := a.matchers[field] - for _, match := range matcher.MatchString(haystack) { - // TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]? - a.results[search][matcher.patterns[match.Pattern()]] = true - } - existingResult = a.results[search] - } - - needle := modifiers.CoerceString(expected) - if !a.caseSensitive { - // when operating in case-insensitive mode, search strings must be canonicalised - // (this is ok because search strings are much smaller than the haystack) - // TODO: should we just modify the rules in this case? (saving the lower-casing every time) - needle = strings.ToLower(needle) - } - return existingResult[needle], nil -} - func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) { result := Result{ Match: false, diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go index 598c15b..ed9b511 100644 --- a/evaluator/evaluate_search.go +++ b/evaluator/evaluate_search.go @@ -301,7 +301,7 @@ func toGenericSlice(v interface{}) []interface{} { return []interface{}{v} } - var out []interface{} + out := make([]interface{}, 0, rv.Len()) for i := 0; i < rv.Len(); i++ { out = append(out, rv.Index(i).Interface()) } diff --git a/evaluator/fuzz_test.go b/evaluator/fuzz_test.go index cce0e8d..11e5809 100644 --- a/evaluator/fuzz_test.go +++ b/evaluator/fuzz_test.go @@ -21,6 +21,16 @@ detection: condition: a and b ` +const testRuleRe = ` +id: TEST_RULE +detection: + a: + Foo|re: bar + b: + Bar|endswith: baz + condition: a and b +` + const testConfig = ` title: Test logsources: @@ -55,6 +65,7 @@ func FuzzRuleMatches(f *testing.F) { func FuzzRuleBundleMatches(f *testing.F) { f.Add(testRule, testRule, testConfig, `{"foo": "bar", "bar": "baz"}`, false) + f.Add(testRule, testRuleRe, testConfig, `{"foo": "bar", "bar": "baz"}`, false) f.Fuzz(func(t *testing.T, rule1, rule2, config, payload string, caseSensitive bool) { var r1, r2 sigma.Rule var c sigma.Config diff --git a/evaluator/restring.go b/evaluator/restring.go new file mode 100644 index 0000000..e7507ed --- /dev/null +++ b/evaluator/restring.go @@ -0,0 +1,563 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file is based on http://code.google.com/p/codesearch/source/browse/index/regexp.go, +// modified to find strings instead of trigrams. + +package evaluator + +import ( + "regexp/syntax" + "sort" + "strings" + "unicode" +) + +// regexStrings returns a set of strings such that any string that matches re must +// contain at least one of the strings in the set. If no such set can be found, +// regexStrings returns an empty set. +func regexStrings(re string) (stringSet, bool, error) { + parsed, err := syntax.Parse(re, syntax.Perl) + if err != nil { + return nil, false, err + } + info := analyze(parsed) + return info.bestSet(), parsed.Flags&syntax.FoldCase > 0, nil +} + +// A regexpInfo summarizes the results of analyzing a regexp. +type regexpInfo struct { + // canEmpty records whether the regexp matches the empty string + canEmpty bool + + // exact is the exact set of strings matching the regexp. + exact stringSet + + // if exact is nil, prefix is the set of possible match prefixes, + // and suffix is the set of possible match suffixes. + prefix stringSet // otherwise: the exact set of matching prefixes ... + suffix stringSet // ... and suffixes + + // internal is a set of strings that match internally (not as prefixes or + // suffixes). + internal stringSet +} + +const ( + // Exact sets are limited to maxExact strings. + // If they get too big, simplify will rewrite the regexpInfo + // to use prefix and suffix instead. It's not worthwhile for + // this to be bigger than maxSet. + maxExact = 100 + + // Prefix and suffix sets are limited to maxSet strings. + // If they get too big, simplify will replace groups of strings + // sharing a common leading prefix (or trailing suffix) with + // that common prefix (or suffix). + maxSet = 200 +) + +// anyMatch returns the regexpInfo describing a regexp that +// matches any string. +func anyMatch() regexpInfo { + return regexpInfo{ + canEmpty: true, + prefix: []string{""}, + suffix: []string{""}, + } +} + +// anyChar returns the regexpInfo describing a regexp that +// matches any single character. +func anyChar() regexpInfo { + return regexpInfo{ + prefix: []string{""}, + suffix: []string{""}, + } +} + +// noMatch returns the regexpInfo describing a regexp that +// matches no strings at all. +func noMatch() regexpInfo { + return regexpInfo{} +} + +// emptyString returns the regexpInfo describing a regexp that +// matches only the empty string. +func emptyString() regexpInfo { + return regexpInfo{ + canEmpty: true, + exact: []string{""}, + } +} + +// analyze returns the regexpInfo for the regexp re. +func analyze(re *syntax.Regexp) (ret regexpInfo) { + var info regexpInfo + switch re.Op { + case syntax.OpNoMatch: + return noMatch() + + case syntax.OpEmptyMatch, + syntax.OpBeginLine, syntax.OpEndLine, + syntax.OpBeginText, syntax.OpEndText, + syntax.OpWordBoundary, syntax.OpNoWordBoundary: + return emptyString() + + case syntax.OpLiteral: + if re.Flags&syntax.FoldCase != 0 { + switch len(re.Rune) { + case 0: + return emptyString() + case 1: + // Single-letter case-folded string: + // rewrite into char class and analyze. + re1 := &syntax.Regexp{ + Op: syntax.OpCharClass, + } + re1.Rune = re1.Rune0[:0] + r0 := re.Rune[0] + re1.Rune = append(re1.Rune, r0, r0) + for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { + re1.Rune = append(re1.Rune, r1, r1) + } + info = analyze(re1) + return info + } + // Multi-letter case-folded string: + // treat as concatenation of single-letter case-folded strings. + re1 := &syntax.Regexp{ + Op: syntax.OpLiteral, + Flags: syntax.FoldCase, + } + info = emptyString() + for i := range re.Rune { + re1.Rune = re.Rune[i : i+1] + info = concat(info, analyze(re1)) + } + return info + } + info.exact = stringSet{string(re.Rune)} + + case syntax.OpAnyCharNotNL, syntax.OpAnyChar: + return anyChar() + + case syntax.OpCapture: + return analyze(re.Sub[0]) + + case syntax.OpConcat: + return fold(concat, re.Sub, emptyString()) + + case syntax.OpAlternate: + return fold(alternate, re.Sub, noMatch()) + + case syntax.OpQuest: + return alternate(analyze(re.Sub[0]), emptyString()) + + case syntax.OpStar: + // We don't know anything, so assume the worst. + return anyMatch() + + case syntax.OpRepeat: + if re.Min == 0 { + // Like OpStar + return anyMatch() + } + fallthrough + case syntax.OpPlus: + // x+ + // Since there has to be at least one x, the prefixes and suffixes + // stay the same. If x was exact, it isn't anymore. + info = analyze(re.Sub[0]) + if info.exact.have() { + info.prefix = info.exact + info.suffix = info.exact.copy() + info.exact = nil + } + + case syntax.OpCharClass: + // Special case. + if len(re.Rune) == 0 { + return noMatch() + } + + // Special case. + if len(re.Rune) == 1 { + info.exact = stringSet{string(re.Rune[0])} + break + } + + n := 0 + for i := 0; i < len(re.Rune); i += 2 { + n += int(re.Rune[i+1] - re.Rune[i]) + } + // If the class is too large, it's okay to overestimate. + if n > 100 { + return anyChar() + } + + info.exact = []string{} + for i := 0; i < len(re.Rune); i += 2 { + lo, hi := re.Rune[i], re.Rune[i+1] + for rr := lo; rr <= hi; rr++ { + info.exact.add(string(rr)) + } + } + } + + info.simplify(false) + return info +} + +// fold is the usual higher-order function. +func fold(f func(x, y regexpInfo) regexpInfo, sub []*syntax.Regexp, zero regexpInfo) regexpInfo { + if len(sub) == 0 { + return zero + } + if len(sub) == 1 { + return analyze(sub[0]) + } + info := f(analyze(sub[0]), analyze(sub[1])) + for i := 2; i < len(sub); i++ { + info = f(info, analyze(sub[i])) + } + return info +} + +// concat returns the regexp info for xy given x and y. +func concat(x, y regexpInfo) (out regexpInfo) { + var xy regexpInfo + + if x.exact.have() && y.exact.have() { + xy.exact = x.exact.cross(y.exact, false) + } else { + if x.exact.have() { + xy.prefix = x.exact.cross(y.prefix, false) + } else { + xy.prefix = x.prefix + if x.canEmpty { + xy.prefix = xy.prefix.union(y.prefix, false) + } + } + if y.exact.have() { + xy.suffix = x.suffix.cross(y.exact, true) + } else { + xy.suffix = y.suffix + if y.canEmpty { + xy.suffix = xy.suffix.union(x.suffix, true) + } + } + } + + // If all the possible strings in the cross product of x.suffix + // and y.prefix are long enough, then the trigram for one + // of them must be present and would not necessarily be + // accounted for in xy.prefix or xy.suffix yet. Cut things off + // at maxSet just to keep the sets manageable. + if !x.exact.have() && !y.exact.have() && + x.suffix.size() <= maxSet && y.prefix.size() <= maxSet && + x.suffix.minLen()+y.prefix.minLen() >= 3 { + xy.internal = x.suffix.cross(y.prefix, false) + } + + xy.internal = mostDistinctive(xy.internal, x.internal, y.internal) + + xy.simplify(false) + return xy +} + +// alternate returns the regexpInfo for x|y given x and y. +func alternate(x, y regexpInfo) (out regexpInfo) { + var xy regexpInfo + if x.exact.have() && y.exact.have() { + xy.exact = x.exact.union(y.exact, false) + } else if x.exact.have() { + xy.prefix = x.exact.union(y.prefix, false) + xy.suffix = x.exact.union(y.suffix, true) + } else if y.exact.have() { + xy.prefix = x.prefix.union(y.exact, false) + xy.suffix = x.suffix.union(y.exact.copy(), true) + } else { + xy.prefix = x.prefix.union(y.prefix, false) + xy.suffix = x.suffix.union(y.suffix, true) + } + xy.canEmpty = x.canEmpty || y.canEmpty + + if !xy.exact.have() { + xb := x.bestSet() + yb := y.bestSet() + if len(xb) > 0 && len(yb) > 0 { + xy.internal = xb.union(yb, false) + } + } + + xy.simplify(false) + return xy +} + +// simplify simplifies the regexpInfo when the exact set gets too large. +func (info *regexpInfo) simplify(force bool) { + // If there are now too many exact strings, + // loop over them, moving + // the relevant pieces into prefix and suffix. + info.exact.clean(false) + if len(info.exact) > maxExact { + for _, s := range info.exact { + info.prefix.add(s) + info.suffix.add(s) + } + info.exact = nil + } + + if !info.exact.have() { + info.simplifySet(&info.prefix) + info.simplifySet(&info.suffix) + info.simplifySet(&info.internal) + } +} + +// simplifySet reduces the size of the given set (either prefix or suffix). +// There is no need to pass around enormous prefix or suffix sets, since +// they will only be used to create trigrams. As they get too big, simplifySet +// moves the information they contain into the match query, which is +// more efficient to pass around. +func (info *regexpInfo) simplifySet(s *stringSet) { + t := *s + t.clean(s == &info.suffix) + + n := 0 + for _, str := range t { + if len(str) > n { + n = len(str) + } + } + + for ; t.size() > maxSet; n-- { + // Replace set by strings of length n-1. + w := 0 + for _, str := range t { + if len(str) >= n { + if s == &info.prefix { + str = str[:n-1] + } else { + str = str[len(str)-n+1:] + } + } + if w == 0 || t[w-1] != str { + t[w] = str + w++ + } + } + t = t[:w] + t.clean(s == &info.suffix) + } + + // Now make sure that the prefix/suffix sets aren't redundant. + // For example, if we know "ab" is a possible prefix, then it + // doesn't help at all to know that "abc" is also a possible + // prefix, so delete "abc". + w := 0 + f := strings.HasPrefix + if s == &info.suffix { + f = strings.HasSuffix + } + for _, str := range t { + if w == 0 || !f(str, t[w-1]) { + t[w] = str + w++ + } + } + t = t[:w] + + *s = t +} + +func (info regexpInfo) String() string { + s := "" + if info.canEmpty { + s += "canempty " + } + if info.exact.have() { + s += "exact:" + strings.Join(info.exact, ",") + } else { + s += "prefix:" + strings.Join(info.prefix, ",") + s += " suffix:" + strings.Join(info.suffix, ",") + } + //s += " match: " + info.match.String() + return s +} + +// mostDistinctive returns the most distinctive stringSet in sets. +// The most distinctive set is the one that has the longest minLen. +func mostDistinctive(sets ...stringSet) stringSet { + best := stringSet(nil) + bestLen := 0 + + for _, s := range sets { + if !s.have() { + continue + } + thisLen := s.minLen() + if thisLen > bestLen { + best, bestLen = s, thisLen + } + } + + return best +} + +// bestSet returns the most distinctive set of strings in info. +func (info regexpInfo) bestSet() stringSet { + if info.exact.have() { + return info.exact + } + + return mostDistinctive(info.prefix, info.suffix, info.internal) +} + +// A stringSet is a set of strings. +// The nil stringSet indicates not having a set. +// The non-nil but empty stringSet is the empty set. +type stringSet []string + +// have reports whether we have a stringSet. +func (s stringSet) have() bool { + return s != nil +} + +// contains reports whether s contains str. +func (s stringSet) contains(str string) bool { + for _, ss := range s { + if ss == str { + return true + } + } + return false +} + +type byPrefix []string + +func (x *byPrefix) Len() int { return len(*x) } +func (x *byPrefix) Swap(i, j int) { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] } +func (x *byPrefix) Less(i, j int) bool { return (*x)[i] < (*x)[j] } + +type bySuffix []string + +func (x *bySuffix) Len() int { return len(*x) } +func (x *bySuffix) Swap(i, j int) { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] } +func (x *bySuffix) Less(i, j int) bool { + s := (*x)[i] + t := (*x)[j] + for i := 1; i <= len(s) && i <= len(t); i++ { + si := s[len(s)-i] + ti := t[len(t)-i] + if si < ti { + return true + } + if si > ti { + return false + } + } + return len(s) < len(t) +} + +// add adds str to the set. +func (s *stringSet) add(str string) { + *s = append(*s, str) +} + +// clean removes duplicates from the stringSet. +func (s *stringSet) clean(isSuffix bool) { + t := *s + if isSuffix { + sort.Sort((*bySuffix)(s)) + } else { + sort.Sort((*byPrefix)(s)) + } + w := 0 + for _, str := range t { + if w == 0 || t[w-1] != str { + t[w] = str + w++ + } + } + *s = t[:w] +} + +// size returns the number of strings in s. +func (s stringSet) size() int { + return len(s) +} + +// minLen returns the length of the shortest string in s. +func (s stringSet) minLen() int { + if len(s) == 0 { + return 0 + } + m := len(s[0]) + for _, str := range s { + if m > len(str) { + m = len(str) + } + } + return m +} + +// maxLen returns the length of the longest string in s. +func (s stringSet) maxLen() int { + if len(s) == 0 { + return 0 + } + m := len(s[0]) + for _, str := range s { + if m < len(str) { + m = len(str) + } + } + return m +} + +// union returns the union of s and t, reusing s's storage. +func (s stringSet) union(t stringSet, isSuffix bool) stringSet { + s = append(s, t...) + s.clean(isSuffix) + return s +} + +// cross returns the cross product of s and t. +func (s stringSet) cross(t stringSet, isSuffix bool) stringSet { + p := stringSet{} + for _, ss := range s { + for _, tt := range t { + p.add(ss + tt) + } + } + p.clean(isSuffix) + return p +} + +// clear empties the set but preserves the storage. +func (s *stringSet) clear() { + *s = (*s)[:0] +} + +// copy returns a copy of the set that does not share storage with the original. +func (s stringSet) copy() stringSet { + return append(stringSet{}, s...) +} + +// isSubsetOf returns true if all strings in s are also in t. +// It assumes both sets are sorted. +func (s stringSet) isSubsetOf(t stringSet) bool { + j := 0 + for _, ss := range s { + for j < len(t) && t[j] < ss { + j++ + } + if j >= len(t) || t[j] != ss { + return false + } + } + return true +} diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 new file mode 100644 index 0000000..e86d035 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:\n condition: A") +string("detection:\n 0:\n Foo|contains: >\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"0\"}") +bool(true) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef new file mode 100644 index 0000000..a720e5b --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0: \n condition: A") +string("detection:\n 0:\n Foo|contains: >\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"\"}") +bool(true) From 2790a619f2f9cdb832df70bfdf39c227c3cd0a7f Mon Sep 17 00:00:00 2001 From: Bradley Kemp Date: Tue, 3 Sep 2024 16:38:15 +0100 Subject: [PATCH 4/5] cleanup --- evaluator/evaluate_search.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go index ed9b511..bfdcce1 100644 --- a/evaluator/evaluate_search.go +++ b/evaluator/evaluate_search.go @@ -149,8 +149,6 @@ func (rule *RuleEvaluator) getMatcherValues(ctx context.Context, matcher sigma.F case int, float32, float64, bool: value = fmt.Sprintf("%v", abstractValue) default: - // TODO: temporary hack - return nil, nil return nil, fmt.Errorf("expected scalar field matching value got: %v (%T)", abstractValue, abstractValue) } @@ -193,18 +191,6 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([ return nil, err } - //values := toGenericSlice(v) - //for _, value := range values { - // if stringValue, ok := value.(string); ok && intern != nil { - // interned, ok := intern[stringValue] - // if !ok { - // intern[stringValue] = stringValue - // interned = stringValue - // } - // value = interned - // } - // actualValues = append(actualValues, value) - //} actualValues = append(actualValues, toGenericSlice(v)...) } } From 8604da6fe3d7a716358bbffbd3c5d833bed10f90 Mon Sep 17 00:00:00 2001 From: Bradley Kemp Date: Wed, 4 Sep 2024 15:06:12 +0100 Subject: [PATCH 5/5] dedupe matching logic --- evaluator/bundle.go | 55 ++++++------------------------------------- evaluator/evaluate.go | 4 ++++ 2 files changed, 11 insertions(+), 48 deletions(-) diff --git a/evaluator/bundle.go b/evaluator/bundle.go index 0c842c9..dc1d7fb 100644 --- a/evaluator/bundle.go +++ b/evaluator/bundle.go @@ -2,7 +2,6 @@ package evaluator import ( "context" - "fmt" aho_corasick "github.com/BobuSumisu/aho-corasick" "github.com/bradleyjkemp/sigma-go" "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" @@ -25,11 +24,7 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { values := map[string][]string{} for _, rule := range rules { - e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} - for _, option := range options { - option(e) - } - + e := ForRule(rule, options...) bundle.evaluators = append(bundle.evaluators, e) bundle.caseSensitive = e.caseSensitive @@ -143,52 +138,16 @@ func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]R } ruleresults := []RuleResult{} + errs := []error{} for _, rule := range bundle.evaluators { - result := Result{ - Match: false, - SearchResults: map[string]bool{}, - ConditionResults: make([]bool, len(rule.Detection.Conditions)), - } - for identifier, search := range rule.Detection.Searches { - var err error - result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators) - if err != nil { - return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err) - } + result, err := rule.matches(ctx, event, comparators) + if err != nil { + errs = append(errs, err) + continue } - - for conditionIndex, condition := range rule.Detection.Conditions { - searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults) - - switch { - // Event didn't match filters - case !searchMatches: - result.ConditionResults[conditionIndex] = false - continue - - // Simple query without any aggregation - case searchMatches && condition.Aggregation == nil: - result.ConditionResults[conditionIndex] = true - result.Match = true - continue // need to continue in case other conditions contain aggregations that need to be evaluated - - // Search expression matched but still need to see if the aggregation returns true - case searchMatches && condition.Aggregation != nil: - aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event) - if err != nil { - return nil, err - } - if aggregationMatches { - result.Match = true - result.ConditionResults[conditionIndex] = true - } - continue - } - } - ruleresults = append(ruleresults, RuleResult{ - Rule: rule.Rule, Result: result, + Rule: rule.Rule, }) } return ruleresults, nil diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go index 99fafcb..1d07ff3 100644 --- a/evaluator/evaluate.go +++ b/evaluator/evaluate.go @@ -96,6 +96,10 @@ func eventValue(e Event, key string) interface{} { } func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) { + return rule.matches(ctx, event, rule.comparators) +} + +func (rule RuleEvaluator) matches(ctx context.Context, event Event, comparators map[string]modifiers.Comparator) (Result, error) { result := Result{ Match: false, SearchResults: map[string]bool{},