From 2511d7b6d9a292ac9bb036c2611e03d6edb50265 Mon Sep 17 00:00:00 2001 From: alvarofraguas Date: Thu, 14 May 2026 19:35:33 +0200 Subject: [PATCH] =?UTF-8?q?osctrl-tls:=20hardening=20=E2=80=94=20constant-?= =?UTF-8?q?time=20secrets,=20per-IP=20enroll=20rate-limit,=20audit-log=20o?= =?UTF-8?q?n=20failed=20enrolls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Carves the osctrl-tls-facing changes out of the original "round 1" security PR (jmpsec/osctrl#813) into their own PR so the TLS surface can be isolated for review and testing. Depends on the shared infrastructure (pkg/auditlog FailedEnroll, pkg/ratelimit, and utils.SetTrustedProxies / GetIP changes) that lands in #813. == Threats addressed == osctrl-tls is the only osctrl service that is internet-facing in a typical deployment — every osquery node POSTs to /enroll, /config, /log, etc. before any authentication exists. That makes its surface the most exposed and the most worth hardening. This PR adds three independent defenses: == 1. Constant-time secret comparison == cmd/tls/handlers/utils.go: checkValidSecret and checkValidRemovePath switched from `strings.TrimSpace(secret) == env.Secret` (byte-by-byte short-circuiting compare) to `subtle.ConstantTimeCompare`. The old form leaked secret bytes via response-time timing — an attacker on the open internet could iterate one byte at a time. Constant-time compare eliminates that oracle. == 2. Per-IP rate-limit on /enroll == cmd/tls/main.go: 20 requests/minute per remote IP, idle eviction after 10 minutes. Rejected requests: - return 429 - record an `AuditLog.FailedEnroll(ip, env, "rate limit exceeded", 0)` so SoC tooling can alert on enroll abuse / spray. Backed by pkg/ratelimit (from #813). The default 20/min easily covers any realistic enroll burst (a wave of provisioning at site bring-up) while shutting down brute-force / DoS attempts. == 3. Audit-log on failed enrollment == cmd/tls/handlers/handlers.go: HandlersTLS grows an AuditLog field + WithAuditLog option. Defensive default is a disabled manager so handler code can call h.AuditLog.FailedEnroll(...) without nil checks. cmd/tls/handlers/post.go: EnrollHandler records FailedEnroll on the invalid-secret path. Combined with the rate-limit failures above, the audit log now carries the full picture of unwanted enroll traffic. cmd/tls/main.go: initializes auditlog.CreateAuditLogManager and threads it into handlers via WithAuditLog. Service name "osctrl-tls" flows through to audit rows. == 4. Trusted-proxies plumbing == cmd/tls/main.go: reads flagParams.Service.TrustedProxies (CIDR list) and calls utils.SetTrustedProxies before any request can reach GetIP. Empty default means GetIP ignores X-Forwarded-For / X-Real-IP and uses RemoteAddr — preventing internet-side header-spoofed bypass of the rate-limiter or audit-log poisoning. == Config == deploy/config/tls.yml: - auditLog: true (on by default; was false) - trustedProxies: "" (empty by default; operators behind a trusted reverse proxy can list the proxy CIDRs) Verified: go build ./... clean, go vet ./... clean, go test ./cmd/tls/... green. Live smoke on the dev stack: 4 osquery nodes enrolled, the audit_logs table records `enroll_failure` rows with correct IPs for forced bad-secret attempts. --- cmd/tls/handlers/handlers.go | 15 +++++++++++++++ cmd/tls/handlers/post.go | 1 + cmd/tls/handlers/utils.go | 19 +++++++++++++++---- cmd/tls/main.go | 34 +++++++++++++++++++++++++++++++++- deploy/config/tls.yml | 9 ++++++++- 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/cmd/tls/handlers/handlers.go b/cmd/tls/handlers/handlers.go index 37859eae..ac307ba3 100644 --- a/cmd/tls/handlers/handlers.go +++ b/cmd/tls/handlers/handlers.go @@ -5,6 +5,7 @@ import ( "strconv" "time" + "github.com/jmpsec/osctrl/pkg/auditlog" "github.com/jmpsec/osctrl/pkg/carves" "github.com/jmpsec/osctrl/pkg/config" "github.com/jmpsec/osctrl/pkg/environments" @@ -63,6 +64,7 @@ type HandlersTLS struct { ConfigEndpoints *config.YAMLConfigurationEndpoints DebugHTTP *zerolog.Logger DebugHTTPConfig *config.YAMLConfigurationDebug + AuditLog *auditlog.AuditLogManager } // TLSResponse to be returned to requests @@ -180,6 +182,14 @@ func WithDebugHTTP(cfg *config.YAMLConfigurationDebug) Option { } } +// WithAuditLog passes the audit-log manager so the TLS service can record +// failed enrollment attempts for SoC alerting. +func WithAuditLog(al *auditlog.AuditLogManager) Option { + return func(h *HandlersTLS) { + h.AuditLog = al + } +} + // CreateHandlersTLS to initialize the TLS handlers struct func CreateHandlersTLS(opts ...Option) *HandlersTLS { h := &HandlersTLS{} @@ -189,6 +199,11 @@ func CreateHandlersTLS(opts ...Option) *HandlersTLS { if h.Envs != nil { h.EnvCache = environments.NewEnvCache(*h.Envs) } + if h.AuditLog == nil { + // Defensive — handlers call h.AuditLog.FailedEnroll(...). Disabled + // manager is a no-op so we don't have to nil-check at every site. + h.AuditLog = &auditlog.AuditLogManager{Enabled: false} + } return h } diff --git a/cmd/tls/handlers/post.go b/cmd/tls/handlers/post.go index 6b0d75d4..dbcd299b 100644 --- a/cmd/tls/handlers/post.go +++ b/cmd/tls/handlers/post.go @@ -97,6 +97,7 @@ func (h *HandlersTLS) EnrollHandler(w http.ResponseWriter, r *http.Request) { } } else { log.Err(err).Msg("invalid enrolling secret") + h.AuditLog.FailedEnroll(utils.GetIP(r), env.Name, "invalid enroll secret", env.ID) utils.HTTPResponse(w, "", http.StatusForbidden, []byte("")) return } diff --git a/cmd/tls/handlers/utils.go b/cmd/tls/handlers/utils.go index 955c43d7..d75eea08 100644 --- a/cmd/tls/handlers/utils.go +++ b/cmd/tls/handlers/utils.go @@ -3,6 +3,7 @@ package handlers import ( "bytes" "crypto/sha1" + "crypto/subtle" "encoding/json" "fmt" "strconv" @@ -32,9 +33,14 @@ func generateCarveSessionID() string { return id.String() } -// Helper to check if the provided secret is valid for this environment +// Helper to check if the provided secret is valid for this environment. +// Constant-time compare to avoid leaking the secret via byte-by-byte timing +// from the anonymous internet-facing enroll endpoint. func (h *HandlersTLS) checkValidSecret(secret string, env environments.TLSEnvironment) bool { - return (strings.TrimSpace(secret) == env.Secret) + return subtle.ConstantTimeCompare( + []byte(strings.TrimSpace(secret)), + []byte(env.Secret), + ) == 1 } // Helper to check if the provided SecretPath is valid for enrolling in a environment @@ -57,9 +63,14 @@ func (h *HandlersTLS) checkExpiredRemoveSecretPath(env environments.TLSEnvironme return h.checkExpiredPath(env.RemoveExpire) } -// Helper to check if the provided generic SecretPath is valid +// Helper to check if the provided generic SecretPath is valid. +// Constant-time compare for the same reason as checkValidSecret — +// these helpers run on the public enroll/remove URL paths. func (h *HandlersTLS) checkValidRemovePath(secretpath, envSecret string) bool { - return (strings.TrimSpace(secretpath) == envSecret) + return subtle.ConstantTimeCompare( + []byte(strings.TrimSpace(secretpath)), + []byte(envSecret), + ) == 1 } // Helper to check if a provided generic SecretPath is expired diff --git a/cmd/tls/main.go b/cmd/tls/main.go index 7a8fb7b9..5f21290f 100644 --- a/cmd/tls/main.go +++ b/cmd/tls/main.go @@ -12,6 +12,7 @@ import ( "time" "github.com/jmpsec/osctrl/cmd/tls/handlers" + "github.com/jmpsec/osctrl/pkg/auditlog" "github.com/jmpsec/osctrl/pkg/backend" "github.com/jmpsec/osctrl/pkg/cache" "github.com/jmpsec/osctrl/pkg/carves" @@ -20,8 +21,10 @@ import ( "github.com/jmpsec/osctrl/pkg/logging" "github.com/jmpsec/osctrl/pkg/nodes" "github.com/jmpsec/osctrl/pkg/queries" + "github.com/jmpsec/osctrl/pkg/ratelimit" "github.com/jmpsec/osctrl/pkg/settings" "github.com/jmpsec/osctrl/pkg/tags" + "github.com/jmpsec/osctrl/pkg/utils" "github.com/jmpsec/osctrl/pkg/version" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -154,6 +157,15 @@ func checkLatestRelease() { // Go go! func osctrlService() { + // Configure forwarding-header trust before any handler can call + // utils.GetIP. Empty (default) means GetIP ignores X-Forwarded-For / + // X-Real-IP and always uses RemoteAddr, so an internet attacker + // can't spoof IPs to defeat the enroll rate-limit or poison the + // audit log. + if tp := strings.TrimSpace(flagParams.Service.TrustedProxies); tp != "" { + utils.SetTrustedProxies(strings.Split(tp, ",")) + log.Info().Msgf("Trusting forwarding headers from: %s", tp) + } // ////////////////////////////// Backend log.Info().Msg("Initializing backend...") // Attempt to connect to backend waiting until is ready @@ -270,6 +282,16 @@ func osctrlService() { } }() } + // Initialize audit log so failed enroll attempts are recorded. + // + auditLog, err := auditlog.CreateAuditLogManager(db.Conn, serviceName, flagParams.Service.AuditLog) + if err != nil { + log.Fatal().Msgf("error initializing audit log manager: %v", err) + } + // Per-IP rate limit on /enroll. Bursts of 20 per minute, idle eviction + // after 10 minutes. + enrollLimiter := ratelimit.New(20, time.Minute, 10*time.Minute) + // Initialize TLS handlers before router log.Info().Msg("Initializing handlers") handlersTLS = handlers.CreateHandlersTLS( @@ -286,6 +308,7 @@ func osctrlService() { handlers.WithOsqueryValues(flagParams.Osquery), handlers.WithConfigEndpoints(flagParams.ConfigEndpoints), handlers.WithDebugHTTP(flagParams.Debug), + handlers.WithAuditLog(auditLog), ) // ///////////////////////// ALL CONTENT IS UNAUTHENTICATED FOR TLS log.Info().Msg("Initializing router") @@ -299,7 +322,16 @@ func osctrlService() { muxTLS.HandleFunc("GET "+errorPath, handlersTLS.ErrorHandler) // TLS: Specific routes for osquery nodes // FIXME this forces all paths to be the same - muxTLS.Handle("POST /{env}/"+environments.DefaultEnrollPath, handlersTLS.PrometheusMiddleware(http.HandlerFunc(handlersTLS.EnrollHandler))) + // Rate-limit + Prometheus around the enroll handler. The limiter sees + // the request first so spray traffic doesn't churn the rest of the + // chain. Audit-log on rejection so SoC tooling can alert. The audit + // helper takes the env name from the path lazily — we don't know the + // env at this layer. + enrollRateLimit := enrollLimiter.HTTPMiddleware(ratelimit.KeyByIP, func(r *http.Request, key string) { + envName := r.PathValue("env") + auditLog.FailedEnroll(key, envName, "rate limit exceeded", 0) + }) + muxTLS.Handle("POST /{env}/"+environments.DefaultEnrollPath, enrollRateLimit(handlersTLS.PrometheusMiddleware(http.HandlerFunc(handlersTLS.EnrollHandler)))) if flagParams.Osquery.Config { muxTLS.Handle("POST /{env}/"+environments.DefaultConfigPath, handlersTLS.PrometheusMiddleware(http.HandlerFunc(handlersTLS.ConfigHandler))) } diff --git a/deploy/config/tls.yml b/deploy/config/tls.yml index 995d99eb..35f7df5d 100644 --- a/deploy/config/tls.yml +++ b/deploy/config/tls.yml @@ -10,7 +10,14 @@ service: host: osctrl.net # Valid values: "none", "json", "db", "saml", "oidc", "oauth" auth: none - auditLog: false + auditLog: true + # Comma-separated CIDR list whose X-Real-IP / X-Forwarded-For headers + # utils.GetIP will trust. osctrl-tls is typically internet-facing for + # osquery node enrollment; keep empty unless you operate it behind a + # trusted reverse proxy that forwards client IPs. Empty (default) + # prevents header-spoofed enroll-rate-limit bypass and audit-log + # poisoning. + trustedProxies: "" # Database configuration db: