From 0b806878a2a9668b87534a642d37879cce5fba6f Mon Sep 17 00:00:00 2001
From: Bradley Kemp <bradley@bradleyjkemp.dev>
Date: Fri, 30 Aug 2024 15:48:03 +0100
Subject: [PATCH 1/5] mvp

---
 evaluator/evaluate.go            | 177 ++++++++++++++++++++++++++++++-
 evaluator/evaluate_search.go     |  26 +++--
 evaluator/evaluate_test.go       |  60 +++++++++++
 evaluator/indexes.go             |   6 +-
 evaluator/modifiers/modifiers.go |  50 +++++----
 evaluator/options.go             |   2 +
 go.mod                           |   3 +-
 go.sum                           |   2 +
 8 files changed, 288 insertions(+), 38 deletions(-)

diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go
index 745b7e9..06101c7 100644
--- a/evaluator/evaluate.go
+++ b/evaluator/evaluate.go
@@ -4,8 +4,10 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-
 	"github.com/bradleyjkemp/sigma-go"
+	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
+	aho_corasick "github.com/pgavlin/aho-corasick"
+	"unsafe"
 )
 
 type RuleEvaluator struct {
@@ -17,6 +19,7 @@ type RuleEvaluator struct {
 
 	expandPlaceholder func(ctx context.Context, placeholderName string) ([]string, error)
 	caseSensitive     bool
+	comparators       map[string]modifiers.Comparator
 
 	count   func(ctx context.Context, gb GroupedByValues) (float64, error)
 	average func(ctx context.Context, gb GroupedByValues, value float64) (float64, error)
@@ -30,6 +33,7 @@ type RuleEvaluator struct {
 // For example, if a Sigma rule has a condition like this (attempting to detect login brute forcing)
 //
 // detection:
+//
 //	  login_attempt:
 //	    # something here
 //	  condition:
@@ -40,6 +44,7 @@ type RuleEvaluator struct {
 // Each different GroupedByValues points to a different box.
 //
 // GroupedByValues
+//
 //	    ||
 //	 ___↓↓___          ________
 //	| User A |        | User B |
@@ -72,6 +77,64 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
 	return e
 }
 
+// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
+// more efficient string matching algorithms
+func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
+	bundle := RuleEvaluatorBundle{
+		ahocorasick: map[string]ahocorasickSearcher{},
+	}
+
+	values := map[string][]string{}
+
+	for _, rule := range rules {
+		e := &RuleEvaluator{Rule: rule}
+		for _, option := range options {
+			option(e)
+		}
+
+		bundle.evaluators = append(bundle.evaluators, e)
+
+		for _, search := range rule.Detection.Searches {
+			for _, matcher := range search.EventMatchers {
+				for _, fieldMatcher := range matcher {
+					for _, value := range fieldMatcher.Values {
+						values[fieldMatcher.Field] = append(values[fieldMatcher.Field], value.(string)) // todo use coerceString
+					}
+				}
+			}
+		}
+	}
+
+	caseSensitive := false
+	if len(bundle.evaluators) > 0 {
+		caseSensitive = bundle.evaluators[0].caseSensitive
+	}
+
+	for field, fieldValues := range values {
+		builder := aho_corasick.NewAhoCorasickBuilder(aho_corasick.Opts{
+			AsciiCaseInsensitive: caseSensitive, // TODO: parse this out from the options
+			MatchOnlyWholeWords:  false,
+			MatchKind:            aho_corasick.StandardMatch,
+			DFA:                  false, // TODO: benchmark
+		})
+		bundle.ahocorasick[field] = ahocorasickSearcher{
+			AhoCorasick: builder.Build(fieldValues),
+			patterns:    fieldValues,
+		}
+	}
+	return bundle
+}
+
+type RuleEvaluatorBundle struct {
+	ahocorasick map[string]ahocorasickSearcher
+	evaluators  []*RuleEvaluator
+}
+
+type ahocorasickSearcher struct {
+	aho_corasick.AhoCorasick
+	patterns []string
+}
+
 type Result struct {
 	Match            bool            // whether this event matches the Sigma rule
 	SearchResults    map[string]bool // For each Search, whether it matched the event
@@ -92,6 +155,116 @@ func eventValue(e Event, key string) interface{} {
 	}
 }
 
+type ahocorasickSearch struct {
+	field    string
+	haystack *byte
+}
+
+type RuleResult struct {
+	Result
+	sigma.Rule
+}
+
+func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
+	if len(bundle.evaluators) == 0 {
+		fmt.Println("no evaluators in bundle!")
+		return nil, nil
+	}
+
+	// copy the current rule comparators
+	comparators := map[string]modifiers.Comparator{}
+	for name, comparator := range bundle.evaluators[0].comparators {
+		comparators[name] = comparator
+	}
+
+	c := &ahocorasickContains{
+		//Comparator: comparators["contains"], // fall back to the normal contains comparator for non MatchField calls
+		matchers: bundle.ahocorasick,
+		results:  map[ahocorasickSearch]map[string]bool{},
+	}
+	// override the contains comparator to use our custom one
+	comparators["contains"] = c
+
+	ruleresults := []RuleResult{}
+	for _, rule := range bundle.evaluators {
+		result := Result{
+			Match:            false,
+			SearchResults:    map[string]bool{},
+			ConditionResults: make([]bool, len(rule.Detection.Conditions)),
+		}
+		for identifier, search := range rule.Detection.Searches {
+			var err error
+			result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators)
+			if err != nil {
+				return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err)
+			}
+		}
+
+		for conditionIndex, condition := range rule.Detection.Conditions {
+			searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults)
+
+			switch {
+			// Event didn't match filters
+			case !searchMatches:
+				result.ConditionResults[conditionIndex] = false
+				continue
+
+			// Simple query without any aggregation
+			case searchMatches && condition.Aggregation == nil:
+				result.ConditionResults[conditionIndex] = true
+				result.Match = true
+				continue // need to continue in case other conditions contain aggregations that need to be evaluated
+
+			// Search expression matched but still need to see if the aggregation returns true
+			case searchMatches && condition.Aggregation != nil:
+				aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event)
+				if err != nil {
+					return nil, err
+				}
+				if aggregationMatches {
+					result.Match = true
+					result.ConditionResults[conditionIndex] = true
+				}
+				continue
+			}
+		}
+
+		ruleresults = append(ruleresults, RuleResult{
+			Rule:   rule.Rule,
+			Result: result,
+		})
+	}
+	return ruleresults, nil
+}
+
+type ahocorasickContains struct {
+	runCount int
+	modifiers.Comparator
+	matchers map[string]ahocorasickSearcher
+	results  map[ahocorasickSearch]map[string]bool
+}
+
+func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
+	haystack := modifiers.CoerceString(actual)
+	search := ahocorasickSearch{
+		field:    field,
+		haystack: unsafe.StringData(haystack),
+	}
+	//search := haystack
+	existingResult, ok := a.results[search]
+	if !ok {
+		a.runCount++
+		a.results[search] = map[string]bool{}
+		matcher := a.matchers[field]
+		for _, match := range matcher.FindAll(haystack) {
+			a.results[search][matcher.patterns[match.Pattern()]] = true
+		}
+		existingResult = a.results[search]
+	}
+
+	return existingResult[modifiers.CoerceString(expected)], nil
+}
+
 func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
 	result := Result{
 		Match:            false,
@@ -100,7 +273,7 @@ func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, err
 	}
 	for identifier, search := range rule.Detection.Searches {
 		var err error
-		result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event)
+		result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, rule.comparators)
 		if err != nil {
 			return Result{}, fmt.Errorf("error evaluating search %s: %w", identifier, err)
 		}
diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go
index 844e506..aeb2a0b 100644
--- a/evaluator/evaluate_search.go
+++ b/evaluator/evaluate_search.go
@@ -4,14 +4,13 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"github.com/PaesslerAG/jsonpath"
+	"github.com/bradleyjkemp/sigma-go"
 	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
 	"path"
 	"reflect"
 	"regexp"
 	"strings"
-
-	"github.com/PaesslerAG/jsonpath"
-	"github.com/bradleyjkemp/sigma-go"
 )
 
 func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, searchResults map[string]bool) bool {
@@ -84,7 +83,7 @@ func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, sear
 	panic(fmt.Sprintf("unhandled node type %T", search))
 }
 
-func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event) (bool, error) {
+func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event, comparators map[string]modifiers.Comparator) (bool, error) {
 	if len(search.Keywords) > 0 {
 		return false, fmt.Errorf("keywords unsupported")
 	}
@@ -112,11 +111,7 @@ eventMatcher:
 			// field matchers can specify modifiers (FieldName|modifier1|modifier2) which change the matching behaviour
 			var comparator modifiers.ComparatorFunc
 			var err error
-			if rule.caseSensitive {
-				comparator, err = modifiers.GetComparatorCaseSensitive(fieldModifiers...)
-			} else {
-				comparator, err = modifiers.GetComparator(fieldModifiers...)
-			}
+			comparator, err = modifiers.GetComparator(fieldMatcher.Field, comparators, fieldModifiers...)
 			if err != nil {
 				return false, err
 			}
@@ -196,9 +191,22 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([
 				return nil, err
 			}
 
+			//values := toGenericSlice(v)
+			//for _, value := range values {
+			//	if stringValue, ok := value.(string); ok && intern != nil {
+			//		interned, ok := intern[stringValue]
+			//		if !ok {
+			//			intern[stringValue] = stringValue
+			//			interned = stringValue
+			//		}
+			//		value = interned
+			//	}
+			//	actualValues = append(actualValues, value)
+			//}
 			actualValues = append(actualValues, toGenericSlice(v)...)
 		}
 	}
+
 	return actualValues, nil
 }
 
diff --git a/evaluator/evaluate_test.go b/evaluator/evaluate_test.go
index 797aea1..0b2593c 100644
--- a/evaluator/evaluate_test.go
+++ b/evaluator/evaluate_test.go
@@ -2,6 +2,7 @@ package evaluator
 
 import (
 	"context"
+	"fmt"
 	"testing"
 
 	"github.com/bradleyjkemp/sigma-go"
@@ -92,6 +93,65 @@ func TestRuleEvaluator_Matches(t *testing.T) {
 	}
 }
 
+func TestRuleEvaluatorBundle_Matches(t *testing.T) {
+	r1 := sigma.Rule{
+		Detection: sigma.Detection{
+			Searches: map[string]sigma.Search{
+				"foo": {
+					EventMatchers: []sigma.EventMatcher{
+						{
+							{
+								Field:     "field",
+								Modifiers: []string{"contains"},
+								Values: []interface{}{
+									"foo",
+								},
+							},
+						},
+					},
+				},
+			},
+			Conditions: []sigma.Condition{{
+				Search: sigma.AllOfThem{},
+			},
+			},
+		},
+	}
+	r2 := sigma.Rule{
+		Detection: sigma.Detection{
+			Searches: map[string]sigma.Search{
+				"foo": {
+					EventMatchers: []sigma.EventMatcher{
+						{
+							{
+								Field:     "field",
+								Modifiers: []string{"contains"},
+								Values: []interface{}{
+									"bar",
+								},
+							},
+						},
+					},
+				},
+			},
+			Conditions: []sigma.Condition{{
+				Search: sigma.AllOfThem{},
+			},
+			},
+		},
+	}
+
+	bundle := ForRules([]sigma.Rule{r1, r2})
+
+	results, err := bundle.Matches(context.Background(), map[string]interface{}{
+		"field": "foobar",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Println(results)
+}
+
 func TestRuleEvaluator_Matches_WithPlaceholder(t *testing.T) {
 	rule := ForRule(sigma.Rule{
 		Detection: sigma.Detection{
diff --git a/evaluator/indexes.go b/evaluator/indexes.go
index 5b3c698..1f45ad5 100644
--- a/evaluator/indexes.go
+++ b/evaluator/indexes.go
@@ -64,8 +64,8 @@ func (rule RuleEvaluator) Indexes() []string {
 }
 
 // RelevantToEvent calculates whether a rule is applicable to an event based on:
-// 	* Whether the rule has been configured with a config file that matches the eventIndex
-//	* Whether the event matches the conditions from the config file
+//   - Whether the rule has been configured with a config file that matches the eventIndex
+//   - Whether the event matches the conditions from the config file
 func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string, event Event) (bool, error) {
 	matchedIndex := false
 	for _, index := range rule.indexes {
@@ -82,7 +82,7 @@ func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string
 	// need to check for any value constraints that have been specified
 	// TODO: this doesn't yet support the logsourcemerging option to choose between ANDing/ORing these conditions
 	for _, condition := range rule.indexConditions {
-		searchMatches, err := rule.evaluateSearch(ctx, condition, event)
+		searchMatches, err := rule.evaluateSearch(ctx, condition, event, rule.comparators)
 		if err != nil {
 			return false, fmt.Errorf("failed to evaluate index condition: %w", err)
 		}
diff --git a/evaluator/modifiers/modifiers.go b/evaluator/modifiers/modifiers.go
index 58be472..b63f29f 100644
--- a/evaluator/modifiers/modifiers.go
+++ b/evaluator/modifiers/modifiers.go
@@ -11,15 +11,10 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
-func GetComparator(modifiers ...string) (ComparatorFunc, error) {
-	return getComparator(Comparators, modifiers...)
-}
-
-func GetComparatorCaseSensitive(modifiers ...string) (ComparatorFunc, error) {
-	return getComparator(ComparatorsCaseSensitive, modifiers...)
-}
-
-func getComparator(comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) {
+func GetComparator(field string, comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) {
+	if comparators == nil {
+		comparators = Comparators
+	}
 	if len(modifiers) == 0 {
 		return baseComparator{}.Matches, nil
 	}
@@ -69,7 +64,11 @@ func getComparator(comparators map[string]Comparator, modifiers ...string) (Comp
 			}
 		}
 
-		return comparator.Matches(actual, expected)
+		if fieldComparator, ok := comparator.(FieldComparator); ok {
+			return fieldComparator.MatchesField(field, actual, expected)
+		} else {
+			return comparator.Matches(actual, expected)
+		}
 	}, nil
 }
 
@@ -79,6 +78,11 @@ type Comparator interface {
 	Matches(actual any, expected any) (bool, error)
 }
 
+// FieldComparator is an optional extension to Comparator which also passes the field name
+type FieldComparator interface {
+	MatchesField(field string, actual any, expected any) (bool, error)
+}
+
 type ComparatorFunc func(actual, expected any) (bool, error)
 
 // ValueModifier modifies the expected value before it is passed to the comparator.
@@ -127,7 +131,7 @@ func (baseComparator) Matches(actual, expected any) (bool, error) {
 		return true, nil
 	default:
 		// The Sigma spec defines that by default comparisons are case-insensitive
-		return strings.EqualFold(coerceString(actual), coerceString(expected)), nil
+		return strings.EqualFold(CoerceString(actual), CoerceString(expected)), nil
 	}
 }
 
@@ -135,67 +139,67 @@ type contains struct{}
 
 func (contains) Matches(actual, expected any) (bool, error) {
 	// The Sigma spec defines that by default comparisons are case-insensitive
-	return strings.Contains(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil
+	return strings.Contains(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil
 }
 
 type endswith struct{}
 
 func (endswith) Matches(actual, expected any) (bool, error) {
 	// The Sigma spec defines that by default comparisons are case-insensitive
-	return strings.HasSuffix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil
+	return strings.HasSuffix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil
 }
 
 type startswith struct{}
 
 func (startswith) Matches(actual, expected any) (bool, error) {
 	// The Sigma spec defines that by default comparisons are case-insensitive
-	return strings.HasPrefix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil
+	return strings.HasPrefix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil
 }
 
 type containsCS struct{}
 
 func (containsCS) Matches(actual, expected any) (bool, error) {
-	return strings.Contains(coerceString(actual), coerceString(expected)), nil
+	return strings.Contains(CoerceString(actual), CoerceString(expected)), nil
 }
 
 type endswithCS struct{}
 
 func (endswithCS) Matches(actual, expected any) (bool, error) {
-	return strings.HasSuffix(coerceString(actual), coerceString(expected)), nil
+	return strings.HasSuffix(CoerceString(actual), CoerceString(expected)), nil
 }
 
 type startswithCS struct{}
 
 func (startswithCS) Matches(actual, expected any) (bool, error) {
-	return strings.HasPrefix(coerceString(actual), coerceString(expected)), nil
+	return strings.HasPrefix(CoerceString(actual), CoerceString(expected)), nil
 }
 
 type b64 struct{}
 
 func (b64) Modify(value any) (any, error) {
-	return base64.StdEncoding.EncodeToString([]byte(coerceString(value))), nil
+	return base64.StdEncoding.EncodeToString([]byte(CoerceString(value))), nil
 }
 
 type re struct{}
 
 func (re) Matches(actual any, expected any) (bool, error) {
-	re, err := regexp.Compile(coerceString(expected))
+	re, err := regexp.Compile(CoerceString(expected))
 	if err != nil {
 		return false, err
 	}
 
-	return re.MatchString(coerceString(actual)), nil
+	return re.MatchString(CoerceString(actual)), nil
 }
 
 type cidr struct{}
 
 func (cidr) Matches(actual any, expected any) (bool, error) {
-	_, cidr, err := net.ParseCIDR(coerceString(expected))
+	_, cidr, err := net.ParseCIDR(CoerceString(expected))
 	if err != nil {
 		return false, err
 	}
 
-	ip := net.ParseIP(coerceString(actual))
+	ip := net.ParseIP(CoerceString(actual))
 	return cidr.Contains(ip), nil
 }
 
@@ -227,7 +231,7 @@ func (lte) Matches(actual any, expected any) (bool, error) {
 	return lte, err
 }
 
-func coerceString(v interface{}) string {
+func CoerceString(v interface{}) string {
 	switch vv := v.(type) {
 	case string:
 		return vv
diff --git a/evaluator/options.go b/evaluator/options.go
index 4684e54..208a2d9 100644
--- a/evaluator/options.go
+++ b/evaluator/options.go
@@ -2,6 +2,7 @@ package evaluator
 
 import (
 	"context"
+	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
 
 	"github.com/bradleyjkemp/sigma-go"
 )
@@ -45,4 +46,5 @@ func WithConfig(config ...sigma.Config) Option {
 // This can increase performance (especially for larger events) by skipping expensive calls to strings.ToLower
 func CaseSensitive(e *RuleEvaluator) {
 	e.caseSensitive = true
+	e.comparators = modifiers.ComparatorsCaseSensitive
 }
diff --git a/go.mod b/go.mod
index af8cb26..e98c581 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/bradleyjkemp/sigma-go
 
-go 1.18
+go 1.21
 
 require (
 	github.com/PaesslerAG/jsonpath v0.1.1
@@ -13,5 +13,6 @@ require (
 require (
 	github.com/PaesslerAG/gval v1.0.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/pgavlin/aho-corasick v0.5.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 3c7dd53..2654f87 100644
--- a/go.sum
+++ b/go.sum
@@ -13,6 +13,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/pgavlin/aho-corasick v0.5.0 h1:gcEz9/z7CDs/KqZrdSJm6FQw4/dj2/mOho6+p77yZsw=
+github.com/pgavlin/aho-corasick v0.5.0/go.mod h1:UyKgVsAp5Un59BCpzrpFkPyETFMn1tGjdbRYvoq0l2g=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

From 3afe61028196c619215f2e653430be46daf4f0c8 Mon Sep 17 00:00:00 2001
From: Bradley Kemp <bradley@bradleyjkemp.dev>
Date: Mon, 2 Sep 2024 14:19:33 +0100
Subject: [PATCH 2/5] Fuzz, move to new, correct library

---
 evaluator/evaluate.go                         | 79 +++++++++++++------
 evaluator/evaluate_search.go                  |  2 +
 evaluator/evaluate_test.go                    |  4 +-
 evaluator/fuzz_test.go                        | 79 +++++++++++++++++++
 evaluator/index_test.go                       |  3 -
 .../FuzzRuleBundleMatches/1907e011ae8a6fdd    |  6 ++
 .../FuzzRuleBundleMatches/1b692dbec8c613de    |  6 ++
 .../FuzzRuleBundleMatches/3a94d65bc4acc663    |  6 ++
 .../FuzzRuleBundleMatches/59d99db21bdb3323    |  6 ++
 .../FuzzRuleBundleMatches/6450716b6258ade2    |  6 ++
 .../FuzzRuleBundleMatches/754aab3cbb754e99    |  6 ++
 .../FuzzRuleBundleMatches/75e97febfc5feb9e    |  6 ++
 .../FuzzRuleBundleMatches/85bf2132f746b224    |  6 ++
 .../FuzzRuleBundleMatches/89bbe22b303a3d8a    |  6 ++
 .../FuzzRuleBundleMatches/90c72819f91d52a6    |  6 ++
 .../FuzzRuleBundleMatches/9fab5927760a02ef    |  6 ++
 .../FuzzRuleBundleMatches/ef6ebca9ff3c502d    |  6 ++
 go.mod                                        |  2 +-
 go.sum                                        |  4 +-
 19 files changed, 214 insertions(+), 31 deletions(-)
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d

diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go
index 06101c7..537b2e2 100644
--- a/evaluator/evaluate.go
+++ b/evaluator/evaluate.go
@@ -4,9 +4,10 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	aho_corasick "github.com/BobuSumisu/aho-corasick"
 	"github.com/bradleyjkemp/sigma-go"
 	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
-	aho_corasick "github.com/pgavlin/aho-corasick"
+	"strings"
 	"unsafe"
 )
 
@@ -70,7 +71,7 @@ func (a GroupedByValues) Key() string {
 }
 
 func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
-	e := &RuleEvaluator{Rule: rule}
+	e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
 	for _, option := range options {
 		option(e)
 	}
@@ -80,6 +81,10 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
 // ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
 // more efficient string matching algorithms
 func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
+	if len(rules) == 0 {
+		return RuleEvaluatorBundle{}
+	}
+
 	bundle := RuleEvaluatorBundle{
 		ahocorasick: map[string]ahocorasickSearcher{},
 	}
@@ -87,7 +92,7 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
 	values := map[string][]string{}
 
 	for _, rule := range rules {
-		e := &RuleEvaluator{Rule: rule}
+		e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
 		for _, option := range options {
 			option(e)
 		}
@@ -97,8 +102,20 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
 		for _, search := range rule.Detection.Searches {
 			for _, matcher := range search.EventMatchers {
 				for _, fieldMatcher := range matcher {
+					contains := false
+					for _, modifier := range fieldMatcher.Modifiers {
+						if modifier == "contains" {
+							contains = true
+						}
+					}
+					if !contains {
+						continue
+					}
 					for _, value := range fieldMatcher.Values {
-						values[fieldMatcher.Field] = append(values[fieldMatcher.Field], value.(string)) // todo use coerceString
+						if value == nil {
+							continue
+						}
+						values[fieldMatcher.Field] = append(values[fieldMatcher.Field], modifiers.CoerceString(value))
 					}
 				}
 			}
@@ -111,27 +128,31 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
 	}
 
 	for field, fieldValues := range values {
-		builder := aho_corasick.NewAhoCorasickBuilder(aho_corasick.Opts{
-			AsciiCaseInsensitive: caseSensitive, // TODO: parse this out from the options
-			MatchOnlyWholeWords:  false,
-			MatchKind:            aho_corasick.StandardMatch,
-			DFA:                  false, // TODO: benchmark
-		})
+		if !caseSensitive {
+			// when operating in case-insensitive mode, ahocorasick only returns matches for the *first* match
+			// so we have to canonicalise our needles to lowercase.
+			// otherwise if we have both "A" and "a", we're not sure which will be returned as the match
+			// see: go test -run="FuzzRuleBundleMatches/1b692dbec8c613de"
+			for i, value := range fieldValues {
+				fieldValues[i] = strings.ToLower(value)
+			}
+		}
 		bundle.ahocorasick[field] = ahocorasickSearcher{
-			AhoCorasick: builder.Build(fieldValues),
-			patterns:    fieldValues,
+			Trie:     aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(),
+			patterns: fieldValues,
 		}
 	}
 	return bundle
 }
 
 type RuleEvaluatorBundle struct {
-	ahocorasick map[string]ahocorasickSearcher
-	evaluators  []*RuleEvaluator
+	ahocorasick   map[string]ahocorasickSearcher
+	evaluators    []*RuleEvaluator
+	caseSensitive bool
 }
 
 type ahocorasickSearcher struct {
-	aho_corasick.AhoCorasick
+	*aho_corasick.Trie
 	patterns []string
 }
 
@@ -167,7 +188,6 @@ type RuleResult struct {
 
 func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
 	if len(bundle.evaluators) == 0 {
-		fmt.Println("no evaluators in bundle!")
 		return nil, nil
 	}
 
@@ -238,31 +258,46 @@ func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]R
 }
 
 type ahocorasickContains struct {
-	runCount int
+	caseSensitive bool
 	modifiers.Comparator
 	matchers map[string]ahocorasickSearcher
 	results  map[ahocorasickSearch]map[string]bool
 }
 
 func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
+	if actual == nil && expected == "" {
+		// compatability with old |contains behaviour
+		// possibly a bug?
+		return true, nil
+	}
 	haystack := modifiers.CoerceString(actual)
 	search := ahocorasickSearch{
 		field:    field,
 		haystack: unsafe.StringData(haystack),
 	}
-	//search := haystack
+
 	existingResult, ok := a.results[search]
-	if !ok {
-		a.runCount++
+	if !ok { // haven't already computed this
+		if !a.caseSensitive {
+			haystack = strings.ToLower(haystack)
+		}
 		a.results[search] = map[string]bool{}
 		matcher := a.matchers[field]
-		for _, match := range matcher.FindAll(haystack) {
+		for _, match := range matcher.MatchString(haystack) {
+			// TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]?
 			a.results[search][matcher.patterns[match.Pattern()]] = true
 		}
 		existingResult = a.results[search]
 	}
 
-	return existingResult[modifiers.CoerceString(expected)], nil
+	needle := modifiers.CoerceString(expected)
+	if !a.caseSensitive {
+		// when operating in case-insensitive mode, search strings must be canonicalised
+		// (this is ok because search strings are much smaller than the haystack)
+		// TODO: should we just modify the rules in this case? (saving the lower-casing every time)
+		needle = strings.ToLower(needle)
+	}
+	return existingResult[needle], nil
 }
 
 func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go
index aeb2a0b..598c15b 100644
--- a/evaluator/evaluate_search.go
+++ b/evaluator/evaluate_search.go
@@ -149,6 +149,8 @@ func (rule *RuleEvaluator) getMatcherValues(ctx context.Context, matcher sigma.F
 		case int, float32, float64, bool:
 			value = fmt.Sprintf("%v", abstractValue)
 		default:
+			// TODO: temporary hack
+			return nil, nil
 			return nil, fmt.Errorf("expected scalar field matching value got: %v (%T)", abstractValue, abstractValue)
 		}
 
diff --git a/evaluator/evaluate_test.go b/evaluator/evaluate_test.go
index 0b2593c..cc7cbc5 100644
--- a/evaluator/evaluate_test.go
+++ b/evaluator/evaluate_test.go
@@ -2,7 +2,6 @@ package evaluator
 
 import (
 	"context"
-	"fmt"
 	"testing"
 
 	"github.com/bradleyjkemp/sigma-go"
@@ -143,13 +142,12 @@ func TestRuleEvaluatorBundle_Matches(t *testing.T) {
 
 	bundle := ForRules([]sigma.Rule{r1, r2})
 
-	results, err := bundle.Matches(context.Background(), map[string]interface{}{
+	_, err := bundle.Matches(context.Background(), map[string]interface{}{
 		"field": "foobar",
 	})
 	if err != nil {
 		t.Fatal(err)
 	}
-	fmt.Println(results)
 }
 
 func TestRuleEvaluator_Matches_WithPlaceholder(t *testing.T) {
diff --git a/evaluator/fuzz_test.go b/evaluator/fuzz_test.go
index 5a99f54..cce0e8d 100644
--- a/evaluator/fuzz_test.go
+++ b/evaluator/fuzz_test.go
@@ -3,6 +3,9 @@ package evaluator
 import (
 	"context"
 	"encoding/json"
+	"fmt"
+	"reflect"
+	"sync"
 	"testing"
 
 	"github.com/bradleyjkemp/sigma-go"
@@ -49,3 +52,79 @@ func FuzzRuleMatches(f *testing.F) {
 	})
 
 }
+
+func FuzzRuleBundleMatches(f *testing.F) {
+	f.Add(testRule, testRule, testConfig, `{"foo": "bar", "bar": "baz"}`, false)
+	f.Fuzz(func(t *testing.T, rule1, rule2, config, payload string, caseSensitive bool) {
+		var r1, r2 sigma.Rule
+		var c sigma.Config
+		var err error
+		wg := sync.WaitGroup{}
+		wg.Add(1)
+
+		go func() {
+			defer func() {
+				wg.Done()
+				if r := recover(); r != nil {
+					err = fmt.Errorf("panic in parsing")
+				}
+			}()
+			r1, err = sigma.ParseRule([]byte(rule1))
+			if err != nil || len(r1.Detection.Searches) == 0 || len(r1.Detection.Conditions) == 0 {
+				return
+			}
+			r2, err = sigma.ParseRule([]byte(rule2))
+			if err != nil || len(r2.Detection.Searches) == 0 || len(r2.Detection.Conditions) == 0 {
+				return
+			}
+			c, err = sigma.ParseConfig([]byte(config))
+			if err != nil {
+				return
+			}
+		}()
+		wg.Wait()
+		if err != nil {
+			return
+		}
+
+		var e Event
+		if err := json.Unmarshal([]byte(payload), &e); err != nil {
+			return
+		}
+		if reflect.TypeOf(e).Kind() != reflect.Map {
+			return
+		}
+
+		options := []Option{WithConfig(c)}
+		if caseSensitive {
+			options = append(options, CaseSensitive)
+		}
+
+		eval1 := ForRule(r1, WithConfig(c))
+		eval2 := ForRule(r2, WithConfig(c))
+		match1, err1 := eval1.Matches(context.Background(), e)
+		if err1 != nil {
+			return
+		}
+		match2, err2 := eval2.Matches(context.Background(), e)
+		if err2 != nil {
+			return
+		}
+
+		bundle := ForRules([]sigma.Rule{r1, r2}, WithConfig(c))
+		matches, errs := bundle.Matches(context.Background(), e)
+		if errs != nil {
+			panic(errs)
+		}
+		if len(matches) != 2 {
+			panic(fmt.Sprint("didn't get 2 matches, got", len(matches), err))
+		}
+
+		if !reflect.DeepEqual(matches[0].Result, match1) {
+			panic(fmt.Sprint("difference in match1\nbundle:     ", matches[0].Result, "\nstandalone: ", match1))
+		}
+		if !reflect.DeepEqual(matches[1].Result, match2) {
+			panic(fmt.Sprint("difference in match2\nbundle:     ", matches[1].Result, "\nstandalone: ", match2))
+		}
+	})
+}
diff --git a/evaluator/index_test.go b/evaluator/index_test.go
index 2c1d259..adc66d5 100644
--- a/evaluator/index_test.go
+++ b/evaluator/index_test.go
@@ -2,7 +2,6 @@ package evaluator
 
 import (
 	"context"
-	"fmt"
 	"testing"
 
 	"github.com/bradleyjkemp/sigma-go"
@@ -44,8 +43,6 @@ func TestRuleEvaluator_RelevantToEvent_LogsourceRewriting(t *testing.T) {
 		DefaultIndex: "",
 	}))
 
-	fmt.Println(rule.Indexes())
-
 	relevant := []string{
 		"just-category",
 		"category-rewritten-index",
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd
new file mode 100644
index 0000000..1427fc2
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   Foo: BA\n  condition: A")
+string("detection:\n  0:\n   Foo|contains: A\n  condition: A")
+string("fieldmappings:\n    Foo: foo")
+string("{\"foo\":\"BA\"}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de
new file mode 100644
index 0000000..3ae40e6
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   Foo|contains: A\n  condition: A")
+string("detection:\n  0:\n   Foo|contains: a\n  condition: A")
+string("fieldmappings:\n    Foo: foo")
+string("{\"foo\":\"A\"}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663
new file mode 100644
index 0000000..62b3328
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n 0:\n  0:")
+string("detection:\n 0:\n  0:")
+string("")
+string("{}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323
new file mode 100644
index 0000000..4991c8e
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("<<:\n? 0:")
+string("0")
+string("0")
+string("0")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2
new file mode 100644
index 0000000..8e08926
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("\nid: TEST_RULE\ndetection:\n  a:\n    Foo|contains: bar\n  b:\n    Bar|endswith: baz\n  condition: a and b\n")
+string("\nid: TEST_RULE\ndetection:\n   :\n    Foo|contains: bar\n  b:\n    Bar|endswith: baz\n  condition: a and b\n")
+string("\ntitle: Test\nlogsMMources:\n    tes   Bar:    product: tes \n\nfieldmappings:\n   tFoo: $.foo\n    Bar: $.foobar.baz\n")
+string("{\"foo\": \"bar\", \"bar\": \"baz\"}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99
new file mode 100644
index 0000000..bbf62ac
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection: \n 00:\n  00:")
+string("detection: \n 0: \n  0:")
+string("")
+string("0")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e
new file mode 100644
index 0000000..cd1b85e
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n 0:\n  0:")
+string("detection:\n 0:\n  0:")
+string("")
+string("A")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224
new file mode 100644
index 0000000..737b75d
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   0: \n  condition: A")
+string("detection:\n 0:\n  0|contains: >")
+string("0")
+string("{}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a
new file mode 100644
index 0000000..b369b59
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection: \n 0: \n  0:")
+string("detection: \n 00:\n  00|:")
+string("")
+string("{}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6
new file mode 100644
index 0000000..adfed68
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("\nid: TEST_RULE\ndetection:\n  a:\n    Foo|contains: bar\n  b:\n    Bar|endswith: baz\n  condition: a and b\n")
+string("\nid: TEST_RULE\ndetection:\n  a:\n    Foo|contains: bar\n  b:\n    Bar|endswith: baz\n  condition: a and b\n")
+string("\ntitle: Test\nlogsources:\n    test:\n        product: test\n\nfieldmappings:\n    Foo: $.foo\n    Bar: $.foobar.baz\n")
+string("{\"foo\": \"bAr\", \"000\": \"000\"}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef
new file mode 100644
index 0000000..2c34913
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   Foo|contains: BA\n  condition: A")
+string("detection:\n  0:\n   Foo|contains: B\n  condition: A")
+string("fieldmappings:\n    Foo: foo")
+string("{\"foo\":\"BA\"}")
+bool(false)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d
new file mode 100644
index 0000000..311973c
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n 0:\n  00:\n  1:")
+string("detection:\n 0:\n  0:")
+string("")
+string("{}")
+bool(false)
diff --git a/go.mod b/go.mod
index e98c581..21e923d 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/bradleyjkemp/sigma-go
 go 1.21
 
 require (
+	github.com/BobuSumisu/aho-corasick v1.0.3
 	github.com/PaesslerAG/jsonpath v0.1.1
 	github.com/alecthomas/participle v0.7.1
 	github.com/bradleyjkemp/cupaloy/v2 v2.6.0
@@ -13,6 +14,5 @@ require (
 require (
 	github.com/PaesslerAG/gval v1.0.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/pgavlin/aho-corasick v0.5.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 2654f87..7b18aa4 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
+github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
 github.com/PaesslerAG/gval v1.0.0 h1:GEKnRwkWDdf9dOmKcNrar9EA1bz1z9DqPIO1+iLzhd8=
 github.com/PaesslerAG/gval v1.0.0/go.mod h1:y/nm5yEyTeX6av0OfKJNp9rBNj2XrGhAf5+v24IBN1I=
 github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
@@ -13,8 +15,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/pgavlin/aho-corasick v0.5.0 h1:gcEz9/z7CDs/KqZrdSJm6FQw4/dj2/mOho6+p77yZsw=
-github.com/pgavlin/aho-corasick v0.5.0/go.mod h1:UyKgVsAp5Un59BCpzrpFkPyETFMn1tGjdbRYvoq0l2g=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

From 0b38dbae8fa851b517b3c124192c9a97c05da05a Mon Sep 17 00:00:00 2001
From: Bradley Kemp <bradley@bradleyjkemp.dev>
Date: Mon, 2 Sep 2024 15:53:17 +0100
Subject: [PATCH 3/5] Use same technique for regexes

---
 evaluator/bundle.go                           | 258 ++++++++
 evaluator/evaluate.go                         | 205 -------
 evaluator/evaluate_search.go                  |   2 +-
 evaluator/fuzz_test.go                        |  11 +
 evaluator/restring.go                         | 563 ++++++++++++++++++
 .../FuzzRuleBundleMatches/5767f35675911705    |   6 +
 .../FuzzRuleBundleMatches/8ef99a169708daef    |   6 +
 7 files changed, 845 insertions(+), 206 deletions(-)
 create mode 100644 evaluator/bundle.go
 create mode 100644 evaluator/restring.go
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705
 create mode 100644 evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef

diff --git a/evaluator/bundle.go b/evaluator/bundle.go
new file mode 100644
index 0000000..0c842c9
--- /dev/null
+++ b/evaluator/bundle.go
@@ -0,0 +1,258 @@
+package evaluator
+
+import (
+	"context"
+	"fmt"
+	aho_corasick "github.com/BobuSumisu/aho-corasick"
+	"github.com/bradleyjkemp/sigma-go"
+	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
+	"regexp"
+	"strings"
+	"unsafe"
+)
+
+// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
+// more efficient string matching algorithms
+func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
+	if len(rules) == 0 {
+		return RuleEvaluatorBundle{}
+	}
+
+	bundle := RuleEvaluatorBundle{
+		ahocorasick: map[string]ahocorasickSearcher{},
+	}
+
+	values := map[string][]string{}
+
+	for _, rule := range rules {
+		e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
+		for _, option := range options {
+			option(e)
+		}
+
+		bundle.evaluators = append(bundle.evaluators, e)
+		bundle.caseSensitive = e.caseSensitive
+
+		for _, search := range rule.Detection.Searches {
+			for _, matcher := range search.EventMatchers {
+				for _, fieldMatcher := range matcher {
+					contains := false
+					regex := false
+					for _, modifier := range fieldMatcher.Modifiers {
+						if modifier == "contains" {
+							contains = true
+						}
+						if modifier == "re" {
+							regex = true
+						}
+					}
+					switch {
+					case contains: // add all values to the needle set
+						for _, value := range fieldMatcher.Values {
+							if value == nil {
+								continue
+							}
+							stringValue := modifiers.CoerceString(value)
+							if !bundle.caseSensitive {
+								stringValue = strings.ToLower(stringValue)
+							}
+							values[fieldMatcher.Field] = append(values[fieldMatcher.Field], stringValue)
+						}
+					case regex: // get "necessary" substrings and add to the needle set
+						for _, value := range fieldMatcher.Values {
+							ss, caseInsensitive, _ := regexStrings(modifiers.CoerceString(value)) // todo: benchmark this, should save the result?
+							for _, s := range ss {
+								if caseInsensitive {
+									s = strings.ToLower(s)
+								}
+								values[fieldMatcher.Field] = append(values[fieldMatcher.Field], s)
+							}
+						}
+					}
+
+				}
+			}
+		}
+	}
+
+	for field, fieldValues := range values {
+		bundle.ahocorasick[field] = ahocorasickSearcher{
+			Trie:     aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(),
+			patterns: fieldValues,
+			results:  map[*byte]map[string]bool{}, // used for caching results
+		}
+	}
+	return bundle
+}
+
+type RuleEvaluatorBundle struct {
+	ahocorasick   map[string]ahocorasickSearcher
+	evaluators    []*RuleEvaluator
+	caseSensitive bool
+}
+
+type ahocorasickSearcher struct {
+	*aho_corasick.Trie
+	patterns []string
+	results  map[*byte]map[string]bool
+}
+
+func (as ahocorasickSearcher) getResults(s string, caseSensitive bool) map[string]bool {
+	key := unsafe.StringData(s) // using the underlying []byte pointer means we only compute results once per interned string
+	result, ok := as.results[key]
+	if ok {
+		return result
+	}
+
+	// haven't already computed this
+	if !caseSensitive {
+		s = strings.ToLower(s)
+	}
+	results := map[string]bool{}
+	as.results[key] = results
+	for _, match := range as.MatchString(s) {
+		// TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]?
+		as.results[key][match.MatchString()] = true
+	}
+	return results
+}
+
+type RuleResult struct {
+	Result
+	sigma.Rule
+}
+
+func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
+	if len(bundle.evaluators) == 0 {
+		return nil, nil
+	}
+
+	// copy the current rule comparators
+	comparators := map[string]modifiers.Comparator{}
+	for name, comparator := range bundle.evaluators[0].comparators {
+		comparators[name] = comparator
+	}
+
+	// override the contains comparator to use our custom one
+	comparators["contains"] = &ahocorasickContains{
+		matchers:      bundle.ahocorasick,
+		caseSensitive: bundle.caseSensitive,
+	}
+	comparators["re"] = &ahocorasickRe{
+		matchers: bundle.ahocorasick,
+	}
+
+	ruleresults := []RuleResult{}
+	for _, rule := range bundle.evaluators {
+		result := Result{
+			Match:            false,
+			SearchResults:    map[string]bool{},
+			ConditionResults: make([]bool, len(rule.Detection.Conditions)),
+		}
+		for identifier, search := range rule.Detection.Searches {
+			var err error
+			result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators)
+			if err != nil {
+				return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err)
+			}
+		}
+
+		for conditionIndex, condition := range rule.Detection.Conditions {
+			searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults)
+
+			switch {
+			// Event didn't match filters
+			case !searchMatches:
+				result.ConditionResults[conditionIndex] = false
+				continue
+
+			// Simple query without any aggregation
+			case searchMatches && condition.Aggregation == nil:
+				result.ConditionResults[conditionIndex] = true
+				result.Match = true
+				continue // need to continue in case other conditions contain aggregations that need to be evaluated
+
+			// Search expression matched but still need to see if the aggregation returns true
+			case searchMatches && condition.Aggregation != nil:
+				aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event)
+				if err != nil {
+					return nil, err
+				}
+				if aggregationMatches {
+					result.Match = true
+					result.ConditionResults[conditionIndex] = true
+				}
+				continue
+			}
+		}
+
+		ruleresults = append(ruleresults, RuleResult{
+			Rule:   rule.Rule,
+			Result: result,
+		})
+	}
+	return ruleresults, nil
+}
+
+type ahocorasickContains struct {
+	caseSensitive bool
+	modifiers.Comparator
+	matchers map[string]ahocorasickSearcher
+}
+
+func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
+	if expected == "" {
+		// compatability with old |contains behaviour
+		// possibly a bug?
+		return true, nil
+	}
+
+	results := a.matchers[field].getResults(modifiers.CoerceString(actual), a.caseSensitive)
+
+	needle := modifiers.CoerceString(expected)
+	if !a.caseSensitive {
+		// when operating in case-insensitive mode, search strings must be canonicalised
+		// (this is ok because search strings are much smaller than the haystack)
+		// TODO: should we just modify the rules in this case? (saving the lower-casing every time)
+		needle = strings.ToLower(needle)
+	}
+	return results[needle], nil
+}
+
+type ahocorasickRe struct {
+	modifiers.Comparator
+	matchers map[string]ahocorasickSearcher
+}
+
+func (a *ahocorasickRe) MatchesField(field string, actual any, expected any) (bool, error) {
+	stringRe := modifiers.CoerceString(expected)
+	re, err := regexp.Compile(stringRe) // todo: cache this?
+	if err != nil {
+		return false, err
+	}
+
+	// this function returns a set of simple strings
+	// which necessarily appear if the regex matches
+	// If none are present in `actual`, we don't need to run the regex
+	ss, caseInsensitive, err := regexStrings(stringRe)
+	if err != nil {
+		return false, err
+	}
+
+	haystack := modifiers.CoerceString(actual)
+	results := a.matchers[field].getResults(haystack, !caseInsensitive)
+	found := false
+	for _, s := range ss {
+		if results[s] {
+			found = true
+			break
+		}
+	}
+	if !found {
+		return false, nil
+	}
+
+	// our cheap heuristic says the regex *might* match the string,
+	// so we have to now run the full regex
+	return re.MatchString(haystack), nil
+}
diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go
index 537b2e2..99fafcb 100644
--- a/evaluator/evaluate.go
+++ b/evaluator/evaluate.go
@@ -4,11 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	aho_corasick "github.com/BobuSumisu/aho-corasick"
 	"github.com/bradleyjkemp/sigma-go"
 	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
-	"strings"
-	"unsafe"
 )
 
 type RuleEvaluator struct {
@@ -78,84 +75,6 @@ func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
 	return e
 }
 
-// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
-// more efficient string matching algorithms
-func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
-	if len(rules) == 0 {
-		return RuleEvaluatorBundle{}
-	}
-
-	bundle := RuleEvaluatorBundle{
-		ahocorasick: map[string]ahocorasickSearcher{},
-	}
-
-	values := map[string][]string{}
-
-	for _, rule := range rules {
-		e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
-		for _, option := range options {
-			option(e)
-		}
-
-		bundle.evaluators = append(bundle.evaluators, e)
-
-		for _, search := range rule.Detection.Searches {
-			for _, matcher := range search.EventMatchers {
-				for _, fieldMatcher := range matcher {
-					contains := false
-					for _, modifier := range fieldMatcher.Modifiers {
-						if modifier == "contains" {
-							contains = true
-						}
-					}
-					if !contains {
-						continue
-					}
-					for _, value := range fieldMatcher.Values {
-						if value == nil {
-							continue
-						}
-						values[fieldMatcher.Field] = append(values[fieldMatcher.Field], modifiers.CoerceString(value))
-					}
-				}
-			}
-		}
-	}
-
-	caseSensitive := false
-	if len(bundle.evaluators) > 0 {
-		caseSensitive = bundle.evaluators[0].caseSensitive
-	}
-
-	for field, fieldValues := range values {
-		if !caseSensitive {
-			// when operating in case-insensitive mode, ahocorasick only returns matches for the *first* match
-			// so we have to canonicalise our needles to lowercase.
-			// otherwise if we have both "A" and "a", we're not sure which will be returned as the match
-			// see: go test -run="FuzzRuleBundleMatches/1b692dbec8c613de"
-			for i, value := range fieldValues {
-				fieldValues[i] = strings.ToLower(value)
-			}
-		}
-		bundle.ahocorasick[field] = ahocorasickSearcher{
-			Trie:     aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(),
-			patterns: fieldValues,
-		}
-	}
-	return bundle
-}
-
-type RuleEvaluatorBundle struct {
-	ahocorasick   map[string]ahocorasickSearcher
-	evaluators    []*RuleEvaluator
-	caseSensitive bool
-}
-
-type ahocorasickSearcher struct {
-	*aho_corasick.Trie
-	patterns []string
-}
-
 type Result struct {
 	Match            bool            // whether this event matches the Sigma rule
 	SearchResults    map[string]bool // For each Search, whether it matched the event
@@ -176,130 +95,6 @@ func eventValue(e Event, key string) interface{} {
 	}
 }
 
-type ahocorasickSearch struct {
-	field    string
-	haystack *byte
-}
-
-type RuleResult struct {
-	Result
-	sigma.Rule
-}
-
-func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
-	if len(bundle.evaluators) == 0 {
-		return nil, nil
-	}
-
-	// copy the current rule comparators
-	comparators := map[string]modifiers.Comparator{}
-	for name, comparator := range bundle.evaluators[0].comparators {
-		comparators[name] = comparator
-	}
-
-	c := &ahocorasickContains{
-		//Comparator: comparators["contains"], // fall back to the normal contains comparator for non MatchField calls
-		matchers: bundle.ahocorasick,
-		results:  map[ahocorasickSearch]map[string]bool{},
-	}
-	// override the contains comparator to use our custom one
-	comparators["contains"] = c
-
-	ruleresults := []RuleResult{}
-	for _, rule := range bundle.evaluators {
-		result := Result{
-			Match:            false,
-			SearchResults:    map[string]bool{},
-			ConditionResults: make([]bool, len(rule.Detection.Conditions)),
-		}
-		for identifier, search := range rule.Detection.Searches {
-			var err error
-			result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators)
-			if err != nil {
-				return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err)
-			}
-		}
-
-		for conditionIndex, condition := range rule.Detection.Conditions {
-			searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults)
-
-			switch {
-			// Event didn't match filters
-			case !searchMatches:
-				result.ConditionResults[conditionIndex] = false
-				continue
-
-			// Simple query without any aggregation
-			case searchMatches && condition.Aggregation == nil:
-				result.ConditionResults[conditionIndex] = true
-				result.Match = true
-				continue // need to continue in case other conditions contain aggregations that need to be evaluated
-
-			// Search expression matched but still need to see if the aggregation returns true
-			case searchMatches && condition.Aggregation != nil:
-				aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event)
-				if err != nil {
-					return nil, err
-				}
-				if aggregationMatches {
-					result.Match = true
-					result.ConditionResults[conditionIndex] = true
-				}
-				continue
-			}
-		}
-
-		ruleresults = append(ruleresults, RuleResult{
-			Rule:   rule.Rule,
-			Result: result,
-		})
-	}
-	return ruleresults, nil
-}
-
-type ahocorasickContains struct {
-	caseSensitive bool
-	modifiers.Comparator
-	matchers map[string]ahocorasickSearcher
-	results  map[ahocorasickSearch]map[string]bool
-}
-
-func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
-	if actual == nil && expected == "" {
-		// compatability with old |contains behaviour
-		// possibly a bug?
-		return true, nil
-	}
-	haystack := modifiers.CoerceString(actual)
-	search := ahocorasickSearch{
-		field:    field,
-		haystack: unsafe.StringData(haystack),
-	}
-
-	existingResult, ok := a.results[search]
-	if !ok { // haven't already computed this
-		if !a.caseSensitive {
-			haystack = strings.ToLower(haystack)
-		}
-		a.results[search] = map[string]bool{}
-		matcher := a.matchers[field]
-		for _, match := range matcher.MatchString(haystack) {
-			// TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]?
-			a.results[search][matcher.patterns[match.Pattern()]] = true
-		}
-		existingResult = a.results[search]
-	}
-
-	needle := modifiers.CoerceString(expected)
-	if !a.caseSensitive {
-		// when operating in case-insensitive mode, search strings must be canonicalised
-		// (this is ok because search strings are much smaller than the haystack)
-		// TODO: should we just modify the rules in this case? (saving the lower-casing every time)
-		needle = strings.ToLower(needle)
-	}
-	return existingResult[needle], nil
-}
-
 func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
 	result := Result{
 		Match:            false,
diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go
index 598c15b..ed9b511 100644
--- a/evaluator/evaluate_search.go
+++ b/evaluator/evaluate_search.go
@@ -301,7 +301,7 @@ func toGenericSlice(v interface{}) []interface{} {
 		return []interface{}{v}
 	}
 
-	var out []interface{}
+	out := make([]interface{}, 0, rv.Len())
 	for i := 0; i < rv.Len(); i++ {
 		out = append(out, rv.Index(i).Interface())
 	}
diff --git a/evaluator/fuzz_test.go b/evaluator/fuzz_test.go
index cce0e8d..11e5809 100644
--- a/evaluator/fuzz_test.go
+++ b/evaluator/fuzz_test.go
@@ -21,6 +21,16 @@ detection:
   condition: a and b
 `
 
+const testRuleRe = `
+id: TEST_RULE
+detection:
+  a:
+    Foo|re: bar
+  b:
+    Bar|endswith: baz
+  condition: a and b
+`
+
 const testConfig = `
 title: Test
 logsources:
@@ -55,6 +65,7 @@ func FuzzRuleMatches(f *testing.F) {
 
 func FuzzRuleBundleMatches(f *testing.F) {
 	f.Add(testRule, testRule, testConfig, `{"foo": "bar", "bar": "baz"}`, false)
+	f.Add(testRule, testRuleRe, testConfig, `{"foo": "bar", "bar": "baz"}`, false)
 	f.Fuzz(func(t *testing.T, rule1, rule2, config, payload string, caseSensitive bool) {
 		var r1, r2 sigma.Rule
 		var c sigma.Config
diff --git a/evaluator/restring.go b/evaluator/restring.go
new file mode 100644
index 0000000..e7507ed
--- /dev/null
+++ b/evaluator/restring.go
@@ -0,0 +1,563 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file is based on http://code.google.com/p/codesearch/source/browse/index/regexp.go,
+// modified to find strings instead of trigrams.
+
+package evaluator
+
+import (
+	"regexp/syntax"
+	"sort"
+	"strings"
+	"unicode"
+)
+
+// regexStrings returns a set of strings such that any string that matches re must
+// contain at least one of the strings in the set. If no such set can be found,
+// regexStrings returns an empty set.
+func regexStrings(re string) (stringSet, bool, error) {
+	parsed, err := syntax.Parse(re, syntax.Perl)
+	if err != nil {
+		return nil, false, err
+	}
+	info := analyze(parsed)
+	return info.bestSet(), parsed.Flags&syntax.FoldCase > 0, nil
+}
+
+// A regexpInfo summarizes the results of analyzing a regexp.
+type regexpInfo struct {
+	// canEmpty records whether the regexp matches the empty string
+	canEmpty bool
+
+	// exact is the exact set of strings matching the regexp.
+	exact stringSet
+
+	// if exact is nil, prefix is the set of possible match prefixes,
+	// and suffix is the set of possible match suffixes.
+	prefix stringSet // otherwise: the exact set of matching prefixes ...
+	suffix stringSet // ... and suffixes
+
+	// internal is a set of strings that match internally (not as prefixes or
+	// suffixes).
+	internal stringSet
+}
+
+const (
+	// Exact sets are limited to maxExact strings.
+	// If they get too big, simplify will rewrite the regexpInfo
+	// to use prefix and suffix instead.  It's not worthwhile for
+	// this to be bigger than maxSet.
+	maxExact = 100
+
+	// Prefix and suffix sets are limited to maxSet strings.
+	// If they get too big, simplify will replace groups of strings
+	// sharing a common leading prefix (or trailing suffix) with
+	// that common prefix (or suffix).
+	maxSet = 200
+)
+
+// anyMatch returns the regexpInfo describing a regexp that
+// matches any string.
+func anyMatch() regexpInfo {
+	return regexpInfo{
+		canEmpty: true,
+		prefix:   []string{""},
+		suffix:   []string{""},
+	}
+}
+
+// anyChar returns the regexpInfo describing a regexp that
+// matches any single character.
+func anyChar() regexpInfo {
+	return regexpInfo{
+		prefix: []string{""},
+		suffix: []string{""},
+	}
+}
+
+// noMatch returns the regexpInfo describing a regexp that
+// matches no strings at all.
+func noMatch() regexpInfo {
+	return regexpInfo{}
+}
+
+// emptyString returns the regexpInfo describing a regexp that
+// matches only the empty string.
+func emptyString() regexpInfo {
+	return regexpInfo{
+		canEmpty: true,
+		exact:    []string{""},
+	}
+}
+
+// analyze returns the regexpInfo for the regexp re.
+func analyze(re *syntax.Regexp) (ret regexpInfo) {
+	var info regexpInfo
+	switch re.Op {
+	case syntax.OpNoMatch:
+		return noMatch()
+
+	case syntax.OpEmptyMatch,
+		syntax.OpBeginLine, syntax.OpEndLine,
+		syntax.OpBeginText, syntax.OpEndText,
+		syntax.OpWordBoundary, syntax.OpNoWordBoundary:
+		return emptyString()
+
+	case syntax.OpLiteral:
+		if re.Flags&syntax.FoldCase != 0 {
+			switch len(re.Rune) {
+			case 0:
+				return emptyString()
+			case 1:
+				// Single-letter case-folded string:
+				// rewrite into char class and analyze.
+				re1 := &syntax.Regexp{
+					Op: syntax.OpCharClass,
+				}
+				re1.Rune = re1.Rune0[:0]
+				r0 := re.Rune[0]
+				re1.Rune = append(re1.Rune, r0, r0)
+				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
+					re1.Rune = append(re1.Rune, r1, r1)
+				}
+				info = analyze(re1)
+				return info
+			}
+			// Multi-letter case-folded string:
+			// treat as concatenation of single-letter case-folded strings.
+			re1 := &syntax.Regexp{
+				Op:    syntax.OpLiteral,
+				Flags: syntax.FoldCase,
+			}
+			info = emptyString()
+			for i := range re.Rune {
+				re1.Rune = re.Rune[i : i+1]
+				info = concat(info, analyze(re1))
+			}
+			return info
+		}
+		info.exact = stringSet{string(re.Rune)}
+
+	case syntax.OpAnyCharNotNL, syntax.OpAnyChar:
+		return anyChar()
+
+	case syntax.OpCapture:
+		return analyze(re.Sub[0])
+
+	case syntax.OpConcat:
+		return fold(concat, re.Sub, emptyString())
+
+	case syntax.OpAlternate:
+		return fold(alternate, re.Sub, noMatch())
+
+	case syntax.OpQuest:
+		return alternate(analyze(re.Sub[0]), emptyString())
+
+	case syntax.OpStar:
+		// We don't know anything, so assume the worst.
+		return anyMatch()
+
+	case syntax.OpRepeat:
+		if re.Min == 0 {
+			// Like OpStar
+			return anyMatch()
+		}
+		fallthrough
+	case syntax.OpPlus:
+		// x+
+		// Since there has to be at least one x, the prefixes and suffixes
+		// stay the same.  If x was exact, it isn't anymore.
+		info = analyze(re.Sub[0])
+		if info.exact.have() {
+			info.prefix = info.exact
+			info.suffix = info.exact.copy()
+			info.exact = nil
+		}
+
+	case syntax.OpCharClass:
+		// Special case.
+		if len(re.Rune) == 0 {
+			return noMatch()
+		}
+
+		// Special case.
+		if len(re.Rune) == 1 {
+			info.exact = stringSet{string(re.Rune[0])}
+			break
+		}
+
+		n := 0
+		for i := 0; i < len(re.Rune); i += 2 {
+			n += int(re.Rune[i+1] - re.Rune[i])
+		}
+		// If the class is too large, it's okay to overestimate.
+		if n > 100 {
+			return anyChar()
+		}
+
+		info.exact = []string{}
+		for i := 0; i < len(re.Rune); i += 2 {
+			lo, hi := re.Rune[i], re.Rune[i+1]
+			for rr := lo; rr <= hi; rr++ {
+				info.exact.add(string(rr))
+			}
+		}
+	}
+
+	info.simplify(false)
+	return info
+}
+
+// fold is the usual higher-order function.
+func fold(f func(x, y regexpInfo) regexpInfo, sub []*syntax.Regexp, zero regexpInfo) regexpInfo {
+	if len(sub) == 0 {
+		return zero
+	}
+	if len(sub) == 1 {
+		return analyze(sub[0])
+	}
+	info := f(analyze(sub[0]), analyze(sub[1]))
+	for i := 2; i < len(sub); i++ {
+		info = f(info, analyze(sub[i]))
+	}
+	return info
+}
+
+// concat returns the regexp info for xy given x and y.
+func concat(x, y regexpInfo) (out regexpInfo) {
+	var xy regexpInfo
+
+	if x.exact.have() && y.exact.have() {
+		xy.exact = x.exact.cross(y.exact, false)
+	} else {
+		if x.exact.have() {
+			xy.prefix = x.exact.cross(y.prefix, false)
+		} else {
+			xy.prefix = x.prefix
+			if x.canEmpty {
+				xy.prefix = xy.prefix.union(y.prefix, false)
+			}
+		}
+		if y.exact.have() {
+			xy.suffix = x.suffix.cross(y.exact, true)
+		} else {
+			xy.suffix = y.suffix
+			if y.canEmpty {
+				xy.suffix = xy.suffix.union(x.suffix, true)
+			}
+		}
+	}
+
+	// If all the possible strings in the cross product of x.suffix
+	// and y.prefix are long enough, then the trigram for one
+	// of them must be present and would not necessarily be
+	// accounted for in xy.prefix or xy.suffix yet.  Cut things off
+	// at maxSet just to keep the sets manageable.
+	if !x.exact.have() && !y.exact.have() &&
+		x.suffix.size() <= maxSet && y.prefix.size() <= maxSet &&
+		x.suffix.minLen()+y.prefix.minLen() >= 3 {
+		xy.internal = x.suffix.cross(y.prefix, false)
+	}
+
+	xy.internal = mostDistinctive(xy.internal, x.internal, y.internal)
+
+	xy.simplify(false)
+	return xy
+}
+
+// alternate returns the regexpInfo for x|y given x and y.
+func alternate(x, y regexpInfo) (out regexpInfo) {
+	var xy regexpInfo
+	if x.exact.have() && y.exact.have() {
+		xy.exact = x.exact.union(y.exact, false)
+	} else if x.exact.have() {
+		xy.prefix = x.exact.union(y.prefix, false)
+		xy.suffix = x.exact.union(y.suffix, true)
+	} else if y.exact.have() {
+		xy.prefix = x.prefix.union(y.exact, false)
+		xy.suffix = x.suffix.union(y.exact.copy(), true)
+	} else {
+		xy.prefix = x.prefix.union(y.prefix, false)
+		xy.suffix = x.suffix.union(y.suffix, true)
+	}
+	xy.canEmpty = x.canEmpty || y.canEmpty
+
+	if !xy.exact.have() {
+		xb := x.bestSet()
+		yb := y.bestSet()
+		if len(xb) > 0 && len(yb) > 0 {
+			xy.internal = xb.union(yb, false)
+		}
+	}
+
+	xy.simplify(false)
+	return xy
+}
+
+// simplify simplifies the regexpInfo when the exact set gets too large.
+func (info *regexpInfo) simplify(force bool) {
+	// If there are now too many exact strings,
+	// loop over them, moving
+	// the relevant pieces into prefix and suffix.
+	info.exact.clean(false)
+	if len(info.exact) > maxExact {
+		for _, s := range info.exact {
+			info.prefix.add(s)
+			info.suffix.add(s)
+		}
+		info.exact = nil
+	}
+
+	if !info.exact.have() {
+		info.simplifySet(&info.prefix)
+		info.simplifySet(&info.suffix)
+		info.simplifySet(&info.internal)
+	}
+}
+
+// simplifySet reduces the size of the given set (either prefix or suffix).
+// There is no need to pass around enormous prefix or suffix sets, since
+// they will only be used to create trigrams.  As they get too big, simplifySet
+// moves the information they contain into the match query, which is
+// more efficient to pass around.
+func (info *regexpInfo) simplifySet(s *stringSet) {
+	t := *s
+	t.clean(s == &info.suffix)
+
+	n := 0
+	for _, str := range t {
+		if len(str) > n {
+			n = len(str)
+		}
+	}
+
+	for ; t.size() > maxSet; n-- {
+		// Replace set by strings of length n-1.
+		w := 0
+		for _, str := range t {
+			if len(str) >= n {
+				if s == &info.prefix {
+					str = str[:n-1]
+				} else {
+					str = str[len(str)-n+1:]
+				}
+			}
+			if w == 0 || t[w-1] != str {
+				t[w] = str
+				w++
+			}
+		}
+		t = t[:w]
+		t.clean(s == &info.suffix)
+	}
+
+	// Now make sure that the prefix/suffix sets aren't redundant.
+	// For example, if we know "ab" is a possible prefix, then it
+	// doesn't help at all to know that  "abc" is also a possible
+	// prefix, so delete "abc".
+	w := 0
+	f := strings.HasPrefix
+	if s == &info.suffix {
+		f = strings.HasSuffix
+	}
+	for _, str := range t {
+		if w == 0 || !f(str, t[w-1]) {
+			t[w] = str
+			w++
+		}
+	}
+	t = t[:w]
+
+	*s = t
+}
+
+func (info regexpInfo) String() string {
+	s := ""
+	if info.canEmpty {
+		s += "canempty "
+	}
+	if info.exact.have() {
+		s += "exact:" + strings.Join(info.exact, ",")
+	} else {
+		s += "prefix:" + strings.Join(info.prefix, ",")
+		s += " suffix:" + strings.Join(info.suffix, ",")
+	}
+	//s += " match: " + info.match.String()
+	return s
+}
+
+// mostDistinctive returns the most distinctive stringSet in sets.
+// The most distinctive set is the one that has the longest minLen.
+func mostDistinctive(sets ...stringSet) stringSet {
+	best := stringSet(nil)
+	bestLen := 0
+
+	for _, s := range sets {
+		if !s.have() {
+			continue
+		}
+		thisLen := s.minLen()
+		if thisLen > bestLen {
+			best, bestLen = s, thisLen
+		}
+	}
+
+	return best
+}
+
+// bestSet returns the most distinctive set of strings in info.
+func (info regexpInfo) bestSet() stringSet {
+	if info.exact.have() {
+		return info.exact
+	}
+
+	return mostDistinctive(info.prefix, info.suffix, info.internal)
+}
+
+// A stringSet is a set of strings.
+// The nil stringSet indicates not having a set.
+// The non-nil but empty stringSet is the empty set.
+type stringSet []string
+
+// have reports whether we have a stringSet.
+func (s stringSet) have() bool {
+	return s != nil
+}
+
+// contains reports whether s contains str.
+func (s stringSet) contains(str string) bool {
+	for _, ss := range s {
+		if ss == str {
+			return true
+		}
+	}
+	return false
+}
+
+type byPrefix []string
+
+func (x *byPrefix) Len() int           { return len(*x) }
+func (x *byPrefix) Swap(i, j int)      { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] }
+func (x *byPrefix) Less(i, j int) bool { return (*x)[i] < (*x)[j] }
+
+type bySuffix []string
+
+func (x *bySuffix) Len() int      { return len(*x) }
+func (x *bySuffix) Swap(i, j int) { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] }
+func (x *bySuffix) Less(i, j int) bool {
+	s := (*x)[i]
+	t := (*x)[j]
+	for i := 1; i <= len(s) && i <= len(t); i++ {
+		si := s[len(s)-i]
+		ti := t[len(t)-i]
+		if si < ti {
+			return true
+		}
+		if si > ti {
+			return false
+		}
+	}
+	return len(s) < len(t)
+}
+
+// add adds str to the set.
+func (s *stringSet) add(str string) {
+	*s = append(*s, str)
+}
+
+// clean removes duplicates from the stringSet.
+func (s *stringSet) clean(isSuffix bool) {
+	t := *s
+	if isSuffix {
+		sort.Sort((*bySuffix)(s))
+	} else {
+		sort.Sort((*byPrefix)(s))
+	}
+	w := 0
+	for _, str := range t {
+		if w == 0 || t[w-1] != str {
+			t[w] = str
+			w++
+		}
+	}
+	*s = t[:w]
+}
+
+// size returns the number of strings in s.
+func (s stringSet) size() int {
+	return len(s)
+}
+
+// minLen returns the length of the shortest string in s.
+func (s stringSet) minLen() int {
+	if len(s) == 0 {
+		return 0
+	}
+	m := len(s[0])
+	for _, str := range s {
+		if m > len(str) {
+			m = len(str)
+		}
+	}
+	return m
+}
+
+// maxLen returns the length of the longest string in s.
+func (s stringSet) maxLen() int {
+	if len(s) == 0 {
+		return 0
+	}
+	m := len(s[0])
+	for _, str := range s {
+		if m < len(str) {
+			m = len(str)
+		}
+	}
+	return m
+}
+
+// union returns the union of s and t, reusing s's storage.
+func (s stringSet) union(t stringSet, isSuffix bool) stringSet {
+	s = append(s, t...)
+	s.clean(isSuffix)
+	return s
+}
+
+// cross returns the cross product of s and t.
+func (s stringSet) cross(t stringSet, isSuffix bool) stringSet {
+	p := stringSet{}
+	for _, ss := range s {
+		for _, tt := range t {
+			p.add(ss + tt)
+		}
+	}
+	p.clean(isSuffix)
+	return p
+}
+
+// clear empties the set but preserves the storage.
+func (s *stringSet) clear() {
+	*s = (*s)[:0]
+}
+
+// copy returns a copy of the set that does not share storage with the original.
+func (s stringSet) copy() stringSet {
+	return append(stringSet{}, s...)
+}
+
+// isSubsetOf returns true if all strings in s are also in t.
+// It assumes both sets are sorted.
+func (s stringSet) isSubsetOf(t stringSet) bool {
+	j := 0
+	for _, ss := range s {
+		for j < len(t) && t[j] < ss {
+			j++
+		}
+		if j >= len(t) || t[j] != ss {
+			return false
+		}
+	}
+	return true
+}
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705
new file mode 100644
index 0000000..e86d035
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   0:\n  condition: A")
+string("detection:\n  0:\n   Foo|contains: >\n  condition: A")
+string("fieldmappings:\n Foo: foo")
+string("{\"foo\":\"0\"}")
+bool(true)
diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef
new file mode 100644
index 0000000..a720e5b
--- /dev/null
+++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef
@@ -0,0 +1,6 @@
+go test fuzz v1
+string("detection:\n  0:\n   0: \n  condition: A")
+string("detection:\n  0:\n   Foo|contains: >\n  condition: A")
+string("fieldmappings:\n Foo: foo")
+string("{\"foo\":\"\"}")
+bool(true)

From 2790a619f2f9cdb832df70bfdf39c227c3cd0a7f Mon Sep 17 00:00:00 2001
From: Bradley Kemp <bradley@bradleyjkemp.dev>
Date: Tue, 3 Sep 2024 16:38:15 +0100
Subject: [PATCH 4/5] cleanup

---
 evaluator/evaluate_search.go | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go
index ed9b511..bfdcce1 100644
--- a/evaluator/evaluate_search.go
+++ b/evaluator/evaluate_search.go
@@ -149,8 +149,6 @@ func (rule *RuleEvaluator) getMatcherValues(ctx context.Context, matcher sigma.F
 		case int, float32, float64, bool:
 			value = fmt.Sprintf("%v", abstractValue)
 		default:
-			// TODO: temporary hack
-			return nil, nil
 			return nil, fmt.Errorf("expected scalar field matching value got: %v (%T)", abstractValue, abstractValue)
 		}
 
@@ -193,18 +191,6 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([
 				return nil, err
 			}
 
-			//values := toGenericSlice(v)
-			//for _, value := range values {
-			//	if stringValue, ok := value.(string); ok && intern != nil {
-			//		interned, ok := intern[stringValue]
-			//		if !ok {
-			//			intern[stringValue] = stringValue
-			//			interned = stringValue
-			//		}
-			//		value = interned
-			//	}
-			//	actualValues = append(actualValues, value)
-			//}
 			actualValues = append(actualValues, toGenericSlice(v)...)
 		}
 	}

From 8604da6fe3d7a716358bbffbd3c5d833bed10f90 Mon Sep 17 00:00:00 2001
From: Bradley Kemp <bradley@bradleyjkemp.dev>
Date: Wed, 4 Sep 2024 15:06:12 +0100
Subject: [PATCH 5/5] dedupe matching logic

---
 evaluator/bundle.go   | 55 ++++++-------------------------------------
 evaluator/evaluate.go |  4 ++++
 2 files changed, 11 insertions(+), 48 deletions(-)

diff --git a/evaluator/bundle.go b/evaluator/bundle.go
index 0c842c9..dc1d7fb 100644
--- a/evaluator/bundle.go
+++ b/evaluator/bundle.go
@@ -2,7 +2,6 @@ package evaluator
 
 import (
 	"context"
-	"fmt"
 	aho_corasick "github.com/BobuSumisu/aho-corasick"
 	"github.com/bradleyjkemp/sigma-go"
 	"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
@@ -25,11 +24,7 @@ func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
 	values := map[string][]string{}
 
 	for _, rule := range rules {
-		e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
-		for _, option := range options {
-			option(e)
-		}
-
+		e := ForRule(rule, options...)
 		bundle.evaluators = append(bundle.evaluators, e)
 		bundle.caseSensitive = e.caseSensitive
 
@@ -143,52 +138,16 @@ func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]R
 	}
 
 	ruleresults := []RuleResult{}
+	errs := []error{}
 	for _, rule := range bundle.evaluators {
-		result := Result{
-			Match:            false,
-			SearchResults:    map[string]bool{},
-			ConditionResults: make([]bool, len(rule.Detection.Conditions)),
-		}
-		for identifier, search := range rule.Detection.Searches {
-			var err error
-			result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, comparators)
-			if err != nil {
-				return nil, fmt.Errorf("error evaluating search %s: %w", identifier, err)
-			}
+		result, err := rule.matches(ctx, event, comparators)
+		if err != nil {
+			errs = append(errs, err)
+			continue
 		}
-
-		for conditionIndex, condition := range rule.Detection.Conditions {
-			searchMatches := rule.evaluateSearchExpression(condition.Search, result.SearchResults)
-
-			switch {
-			// Event didn't match filters
-			case !searchMatches:
-				result.ConditionResults[conditionIndex] = false
-				continue
-
-			// Simple query without any aggregation
-			case searchMatches && condition.Aggregation == nil:
-				result.ConditionResults[conditionIndex] = true
-				result.Match = true
-				continue // need to continue in case other conditions contain aggregations that need to be evaluated
-
-			// Search expression matched but still need to see if the aggregation returns true
-			case searchMatches && condition.Aggregation != nil:
-				aggregationMatches, err := rule.evaluateAggregationExpression(ctx, conditionIndex, condition.Aggregation, event)
-				if err != nil {
-					return nil, err
-				}
-				if aggregationMatches {
-					result.Match = true
-					result.ConditionResults[conditionIndex] = true
-				}
-				continue
-			}
-		}
-
 		ruleresults = append(ruleresults, RuleResult{
-			Rule:   rule.Rule,
 			Result: result,
+			Rule:   rule.Rule,
 		})
 	}
 	return ruleresults, nil
diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go
index 99fafcb..1d07ff3 100644
--- a/evaluator/evaluate.go
+++ b/evaluator/evaluate.go
@@ -96,6 +96,10 @@ func eventValue(e Event, key string) interface{} {
 }
 
 func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
+	return rule.matches(ctx, event, rule.comparators)
+}
+
+func (rule RuleEvaluator) matches(ctx context.Context, event Event, comparators map[string]modifiers.Comparator) (Result, error) {
 	result := Result{
 		Match:            false,
 		SearchResults:    map[string]bool{},