diff --git a/go.mod b/go.mod index 7ccafda3621..b90096a5991 100644 --- a/go.mod +++ b/go.mod @@ -257,3 +257,5 @@ require ( replace golang.org/x/time => github.com/crowdsecurity/time v0.13.0-crowdsec.20250912 replace github.com/corazawaf/coraza/v3 => github.com/crowdsecurity/coraza/v3 v3.3.3-crowdsec.20251113 + +replace github.com/crowdsecurity/grokky => /home/bui/github/crowdsec/grokky diff --git a/go.sum b/go.sum index 0e616426f12..fa2d428724d 100644 --- a/go.sum +++ b/go.sum @@ -132,12 +132,8 @@ github.com/crowdsecurity/coraza/v3 v3.3.3-crowdsec.20251113 h1:eqLdvF2jV1W8ibYXk github.com/crowdsecurity/coraza/v3 v3.3.3-crowdsec.20251113/go.mod h1:fkDhLPDbsU74HtW6ij9UN2hAwq3AP6pVQiWPKquXo8U= github.com/crowdsecurity/dlog v0.0.2 h1:nj/7jLKO0o8tYn79O+g51ASeGLr5oOVahSoJ6Umq51g= github.com/crowdsecurity/dlog v0.0.2/go.mod h1:zpv7r+7KXwgVUZnUNjyP22zc/D7LKjyoY02weH2RBbk= -github.com/crowdsecurity/go-cs-lib v0.0.24 h1:ZIYXHRHCFyByZmMg7S4XE8c/ZMtsTCPVUJbnDcxpTtk= -github.com/crowdsecurity/go-cs-lib v0.0.24/go.mod h1:X0GMJY2CxdA1S09SpuqIKaWQsvRGxXmecUp9cP599dE= github.com/crowdsecurity/go-cs-lib v0.0.25 h1:Ov6VPW9yV+OPsbAIQk1iTkEWhwkpaG0v3lrBzeqjzj4= github.com/crowdsecurity/go-cs-lib v0.0.25/go.mod h1:X0GMJY2CxdA1S09SpuqIKaWQsvRGxXmecUp9cP599dE= -github.com/crowdsecurity/grokky v0.2.2 h1:yALsI9zqpDArYzmSSxfBq2dhYuGUTKMJq8KOEIAsuo4= -github.com/crowdsecurity/grokky v0.2.2/go.mod h1:33usDIYzGDsgX1kHAThCbseso6JuWNJXOzRQDGXHtWM= github.com/crowdsecurity/machineid v1.0.3 h1:mgd//PhMJqyA1EdRTgwRvafwbzNjoktdJyEgZGVCD2Q= github.com/crowdsecurity/machineid v1.0.3/go.mod h1:XWUSlnS0R0+u/JK5ulidwlbceNT3ZOCKteoVQEn6Luo= github.com/crowdsecurity/time v0.13.0-crowdsec.20250912 h1:O+lHeYhtRPubKvqDxhuZSjxefd8RbV1Ik5J7hDthoIA= diff --git a/pkg/parser/bench_grok_test.go b/pkg/parser/bench_grok_test.go new file mode 100644 index 00000000000..4baceea06a9 --- /dev/null +++ b/pkg/parser/bench_grok_test.go @@ -0,0 +1,125 @@ +package parser + +import ( + "testing" + + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + + "github.com/crowdsecurity/crowdsec/pkg/pipeline" +) + +// sshGrokPatterns are realistic SSH log patterns inspired by the sshd-logs.yaml parser. +// Each has unique literal strings that enable fast-reject via the grokky literal pre-check. +var sshGrokPatterns = []string{ + // Node 1: Failed auth + `Failed %{WORD:method} for %{USERNAME:user} from %{IP:src_ip} port %{NUMBER:port} %{WORD:proto}`, + // Node 2: Disconnected preauth + `Disconnected from authenticating user %{USERNAME:user} %{IP:src_ip} port %{NUMBER:port} \[preauth\]`, + // Node 3: Connection closed preauth + `Connection closed by authenticating user %{USERNAME:user} %{IP:src_ip} port %{NUMBER:port} \[preauth\]`, + // Node 4: Invalid user + `Invalid user %{USERNAME:user} from %{IP:src_ip} port %{NUMBER:port}`, + // Node 5: Key negotiation failure + `Unable to negotiate with %{IP:src_ip} port %{NUMBER:port}: no matching key exchange method found.`, + // Node 6: PAM auth failure + `pam_unix\(sshd:auth\): authentication failure; logname= uid=%{NUMBER:uid} euid=%{NUMBER:euid} tty=ssh ruser= rhost=%{IP:src_ip}`, + // Node 7: Auth timeout + `Timeout before authentication for %{IP:src_ip} port %{NUMBER:port}`, + // Node 8: Refused connection + `refused connect from %{DATA:host}\(%{IP:src_ip}\)`, +} + +// buildBenchNodes creates a parent node with N child grok leaf nodes. +// The parent is a skeleton node (no grok of its own) that delegates to leaves. +func buildBenchNodes(b *testing.B, pctx *UnixParserCtx, ectx EnricherCtx) Node { + b.Helper() + + parent := Node{ + NodeConfig: NodeConfig{ + Stage: "s01-parse", + Name: "bench-ssh", + OnSuccess: "next_stage", + }, + } + + for _, pattern := range sshGrokPatterns { + child := NodeConfig{ + Grok: GrokPattern{ + RegexpValue: pattern, + TargetField: "Line.Raw", + }, + } + parent.SubNodes = append(parent.SubNodes, child) + } + + parent.initRuntimeChildrenFromConfig() + + err := parent.compile(pctx, ectx) + require.NoError(b, err, "failed to compile bench nodes") + + return parent +} + +func makeEvent(raw string) pipeline.Event { + return pipeline.Event{ + Stage: "s01-parse", + Line: pipeline.Line{ + Raw: raw, + Labels: map[string]string{"type": "syslog"}, + }, + Parsed: make(map[string]string), + Enriched: make(map[string]string), + Meta: make(map[string]string), + Unmarshaled: make(map[string]any), + Type: pipeline.LOG, + } +} + +func BenchmarkGrokPipeline(b *testing.B) { + log.SetLevel(log.ErrorLevel) + + pctx, ectx := prepTests(b) + parent := buildBenchNodes(b, pctx, ectx) + + // Add the stage to pctx so Parse() processes it + pctx.Stages = []string{"s01-parse"} + nodes := []Node{parent} + + benchCases := []struct { + name string + input string + }{ + { + name: "no_match", + input: "Accepted publickey for admin from 10.0.0.1 port 22 ssh2", + }, + { + name: "first_node", + input: "Failed password for root from 192.168.1.1 port 22 ssh2", + }, + { + name: "fifth_node", + input: "Unable to negotiate with 123.57.135.134 port 45626: no matching key exchange method found.", + }, + { + name: "eighth_node", + input: "refused connect from attacker(192.168.1.1)", + }, + } + + for _, bc := range benchCases { + b.Run(bc.name, func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + evt := makeEvent(bc.input) + _, err := Parse(*pctx, evt, nodes, nil) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/parser/node.go b/pkg/parser/node.go index a0dcecc4905..edfacf7014a 100644 --- a/pkg/parser/node.go +++ b/pkg/parser/node.go @@ -215,9 +215,7 @@ func (n *Node) processGrok(p *pipeline.Event, cachedExprEnv map[string]any) (boo groklabel = n.Grok.RegexpName } - grok := n.RuntimeGrok.RunTimeRegexp.Parse(gstr) - - if len(grok) == 0 { + if !n.RuntimeGrok.RunTimeRegexp.ParseInto(gstr, p.Parsed) { // grok failed, node failed clog.Debugf("+ Grok %q didn't return data on %q", groklabel, gstr) return false, false, nil @@ -226,12 +224,7 @@ func (n *Node) processGrok(p *pipeline.Event, cachedExprEnv map[string]any) (boo // tag explicitly that the *current* node had a successful grok pattern. it's important to know success state nodeHasOKGrok = true - clog.Debugf("+ Grok %q returned %d entries to merge in Parsed", groklabel, len(grok)) - // We managed to grok stuff, merged into parse - for k, v := range grok { - clog.Debugf("\t.Parsed[%q] = %q", k, v) - p.Parsed[k] = v - } + clog.Debugf("+ Grok %q returned data to merge in Parsed", groklabel) // if the grok succeed, process associated statics err := n.RuntimeGrok.ProcessStatics(p, n.EnrichFunctions, clog, n.Debug) if err != nil {