Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement more efficient batch rule evaluator #45

Merged
merged 5 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions evaluator/bundle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package evaluator

import (
"context"
aho_corasick "github.com/BobuSumisu/aho-corasick"
"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
"regexp"
"strings"
"unsafe"
)

// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
// more efficient string matching algorithms
func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
if len(rules) == 0 {
return RuleEvaluatorBundle{}
}

bundle := RuleEvaluatorBundle{
ahocorasick: map[string]ahocorasickSearcher{},
}

values := map[string][]string{}

for _, rule := range rules {
e := ForRule(rule, options...)
bundle.evaluators = append(bundle.evaluators, e)
bundle.caseSensitive = e.caseSensitive

for _, search := range rule.Detection.Searches {
for _, matcher := range search.EventMatchers {
for _, fieldMatcher := range matcher {
contains := false
regex := false
for _, modifier := range fieldMatcher.Modifiers {
if modifier == "contains" {
contains = true
}
if modifier == "re" {
regex = true
}
}
switch {
case contains: // add all values to the needle set
for _, value := range fieldMatcher.Values {
if value == nil {
continue
}
stringValue := modifiers.CoerceString(value)
if !bundle.caseSensitive {
stringValue = strings.ToLower(stringValue)
}
values[fieldMatcher.Field] = append(values[fieldMatcher.Field], stringValue)
}
case regex: // get "necessary" substrings and add to the needle set
for _, value := range fieldMatcher.Values {
ss, caseInsensitive, _ := regexStrings(modifiers.CoerceString(value)) // todo: benchmark this, should save the result?
for _, s := range ss {
if caseInsensitive {
s = strings.ToLower(s)
}
values[fieldMatcher.Field] = append(values[fieldMatcher.Field], s)
}
}
}

}
}
}
}

for field, fieldValues := range values {
bundle.ahocorasick[field] = ahocorasickSearcher{
Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(),
patterns: fieldValues,
results: map[*byte]map[string]bool{}, // used for caching results
}
}
return bundle
}

type RuleEvaluatorBundle struct {
ahocorasick map[string]ahocorasickSearcher
evaluators []*RuleEvaluator
caseSensitive bool
}

type ahocorasickSearcher struct {
*aho_corasick.Trie
patterns []string
results map[*byte]map[string]bool
}

func (as ahocorasickSearcher) getResults(s string, caseSensitive bool) map[string]bool {
key := unsafe.StringData(s) // using the underlying []byte pointer means we only compute results once per interned string
result, ok := as.results[key]
if ok {
return result
}

// haven't already computed this
if !caseSensitive {
s = strings.ToLower(s)
}
results := map[string]bool{}
as.results[key] = results
for _, match := range as.MatchString(s) {
// TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]?
as.results[key][match.MatchString()] = true
}
return results
}

type RuleResult struct {
Result
sigma.Rule
}

func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
if len(bundle.evaluators) == 0 {
return nil, nil
}

// copy the current rule comparators
comparators := map[string]modifiers.Comparator{}
for name, comparator := range bundle.evaluators[0].comparators {
comparators[name] = comparator
}

// override the contains comparator to use our custom one
comparators["contains"] = &ahocorasickContains{
matchers: bundle.ahocorasick,
caseSensitive: bundle.caseSensitive,
}
comparators["re"] = &ahocorasickRe{
matchers: bundle.ahocorasick,
}

ruleresults := []RuleResult{}
errs := []error{}
for _, rule := range bundle.evaluators {
result, err := rule.matches(ctx, event, comparators)
if err != nil {
errs = append(errs, err)
continue
}
ruleresults = append(ruleresults, RuleResult{
Result: result,
Rule: rule.Rule,
})
}
return ruleresults, nil
}

type ahocorasickContains struct {
caseSensitive bool
modifiers.Comparator
matchers map[string]ahocorasickSearcher
}

func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
if expected == "" {
// compatability with old |contains behaviour
// possibly a bug?
return true, nil
}

results := a.matchers[field].getResults(modifiers.CoerceString(actual), a.caseSensitive)

needle := modifiers.CoerceString(expected)
if !a.caseSensitive {
// when operating in case-insensitive mode, search strings must be canonicalised
// (this is ok because search strings are much smaller than the haystack)
// TODO: should we just modify the rules in this case? (saving the lower-casing every time)
needle = strings.ToLower(needle)
}
return results[needle], nil
}

type ahocorasickRe struct {
modifiers.Comparator
matchers map[string]ahocorasickSearcher
}

func (a *ahocorasickRe) MatchesField(field string, actual any, expected any) (bool, error) {
stringRe := modifiers.CoerceString(expected)
re, err := regexp.Compile(stringRe) // todo: cache this?
if err != nil {
return false, err
}

// this function returns a set of simple strings
// which necessarily appear if the regex matches
// If none are present in `actual`, we don't need to run the regex
ss, caseInsensitive, err := regexStrings(stringRe)
if err != nil {
return false, err
}

haystack := modifiers.CoerceString(actual)
results := a.matchers[field].getResults(haystack, !caseInsensitive)
found := false
for _, s := range ss {
if results[s] {
found = true
break
}
}
if !found {
return false, nil
}

// our cheap heuristic says the regex *might* match the string,
// so we have to now run the full regex
return re.MatchString(haystack), nil
}
13 changes: 10 additions & 3 deletions evaluator/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"context"
"encoding/json"
"fmt"

"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
)

type RuleEvaluator struct {
Expand All @@ -17,6 +17,7 @@ type RuleEvaluator struct {

expandPlaceholder func(ctx context.Context, placeholderName string) ([]string, error)
caseSensitive bool
comparators map[string]modifiers.Comparator

count func(ctx context.Context, gb GroupedByValues) (float64, error)
average func(ctx context.Context, gb GroupedByValues, value float64) (float64, error)
Expand All @@ -30,6 +31,7 @@ type RuleEvaluator struct {
// For example, if a Sigma rule has a condition like this (attempting to detect login brute forcing)
//
// detection:
//
// login_attempt:
// # something here
// condition:
Expand All @@ -40,6 +42,7 @@ type RuleEvaluator struct {
// Each different GroupedByValues points to a different box.
//
// GroupedByValues
//
// ||
// ___↓↓___ ________
// | User A | | User B |
Expand All @@ -65,7 +68,7 @@ func (a GroupedByValues) Key() string {
}

func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
e := &RuleEvaluator{Rule: rule}
e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
for _, option := range options {
option(e)
}
Expand Down Expand Up @@ -93,14 +96,18 @@ func eventValue(e Event, key string) interface{} {
}

func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
return rule.matches(ctx, event, rule.comparators)
}

func (rule RuleEvaluator) matches(ctx context.Context, event Event, comparators map[string]modifiers.Comparator) (Result, error) {
result := Result{
Match: false,
SearchResults: map[string]bool{},
ConditionResults: make([]bool, len(rule.Detection.Conditions)),
}
for identifier, search := range rule.Detection.Searches {
var err error
result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event)
result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, rule.comparators)
if err != nil {
return Result{}, fmt.Errorf("error evaluating search %s: %w", identifier, err)
}
Expand Down
16 changes: 6 additions & 10 deletions evaluator/evaluate_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/PaesslerAG/jsonpath"
"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
"path"
"reflect"
"regexp"
"strings"

"github.com/PaesslerAG/jsonpath"
"github.com/bradleyjkemp/sigma-go"
)

func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, searchResults map[string]bool) bool {
Expand Down Expand Up @@ -84,7 +83,7 @@ func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, sear
panic(fmt.Sprintf("unhandled node type %T", search))
}

func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event) (bool, error) {
func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event, comparators map[string]modifiers.Comparator) (bool, error) {
if len(search.Keywords) > 0 {
return false, fmt.Errorf("keywords unsupported")
}
Expand Down Expand Up @@ -112,11 +111,7 @@ eventMatcher:
// field matchers can specify modifiers (FieldName|modifier1|modifier2) which change the matching behaviour
var comparator modifiers.ComparatorFunc
var err error
if rule.caseSensitive {
comparator, err = modifiers.GetComparatorCaseSensitive(fieldModifiers...)
} else {
comparator, err = modifiers.GetComparator(fieldModifiers...)
}
comparator, err = modifiers.GetComparator(fieldMatcher.Field, comparators, fieldModifiers...)
if err != nil {
return false, err
}
Expand Down Expand Up @@ -199,6 +194,7 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([
actualValues = append(actualValues, toGenericSlice(v)...)
}
}

return actualValues, nil
}

Expand Down Expand Up @@ -291,7 +287,7 @@ func toGenericSlice(v interface{}) []interface{} {
return []interface{}{v}
}

var out []interface{}
out := make([]interface{}, 0, rv.Len())
for i := 0; i < rv.Len(); i++ {
out = append(out, rv.Index(i).Interface())
}
Expand Down
Loading
Loading