diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml new file mode 100644 index 000000000..8a29befa9 --- /dev/null +++ b/.github/workflows/unit-tests.yaml @@ -0,0 +1,21 @@ +name: Unit Tests + +on: + pull_request: + branches: + - add-alert-management-api-base + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Run tests + run: go test -count=1 $(go list ./... | grep -v /test/e2e) diff --git a/Dockerfile b/Dockerfile index c0e7f1bc7..f7de736f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,7 @@ RUN make install-backend COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ ENV GOEXPERIMENT=strictfipsruntime ENV CGO_ENABLED=1 diff --git a/Dockerfile.dev b/Dockerfile.dev index 557e5edca..fa279fa38 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -28,6 +28,7 @@ RUN go mod download COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ RUN go build -mod=mod -o plugin-backend cmd/plugin-backend.go diff --git a/Dockerfile.dev-mcp b/Dockerfile.dev-mcp index b2df023e2..49e66c6f3 100644 --- a/Dockerfile.dev-mcp +++ b/Dockerfile.dev-mcp @@ -31,6 +31,7 @@ RUN go mod download COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ RUN go build -mod=mod -o plugin-backend cmd/plugin-backend.go diff --git a/Dockerfile.devspace b/Dockerfile.devspace index 7af8b0d34..6ed4aa543 100644 --- a/Dockerfile.devspace +++ b/Dockerfile.devspace @@ -20,6 +20,7 @@ RUN make install-backend COPY config/ config/ COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ RUN make build-backend diff --git a/Dockerfile.konflux b/Dockerfile.konflux index ba20c4237..31e5923b4 100644 --- a/Dockerfile.konflux +++ b/Dockerfile.konflux @@ -28,6 +28,7 @@ RUN make install-backend COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ ENV GOEXPERIMENT=strictfipsruntime ENV CGO_ENABLED=1 diff --git a/Dockerfile.mcp b/Dockerfile.mcp index 33960459e..84add4f12 100644 --- a/Dockerfile.mcp +++ b/Dockerfile.mcp @@ -28,6 +28,7 @@ RUN make install-backend COPY cmd/ cmd/ COPY pkg/ pkg/ +COPY internal/ internal/ ENV GOOS=${TARGETOS:-linux} ENV GOARCH=${TARGETARCH} diff --git a/Makefile b/Makefile index 7c8d38cdc..9ab0977d3 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,10 @@ lint-frontend: install-backend: go mod download +.PHONY: generate-backend +generate-backend: + go run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen --config api/oapi-codegen.yaml api/openapi.yaml + .PHONY: build-backend build-backend: go build $(BUILD_OPTS) -mod=readonly -o plugin-backend cmd/plugin-backend.go @@ -56,7 +60,11 @@ start-backend: .PHONY: test-backend test-backend: - go test ./pkg/... -v + go test ./pkg/... ./internal/... -v + +.PHONY: test-e2e +test-e2e: + PLUGIN_URL=http://localhost:9001 go test -v -timeout=150m -count=1 ./test/e2e .PHONY: test-frontend test-frontend: diff --git a/api/oapi-codegen.yaml b/api/oapi-codegen.yaml new file mode 100644 index 000000000..1f30d8d1d --- /dev/null +++ b/api/oapi-codegen.yaml @@ -0,0 +1,19 @@ +# oapi-codegen configuration for the monitoring-plugin management API. +# Run: oapi-codegen --config api/oapi-codegen.yaml api/openapi.yaml + +package: managementrouter +output: internal/managementrouter/api_generated.go + +generate: + # Generate the gorilla/mux router bindings (RegisterHandlers / RegisterHandlersWithBaseURL) + gorilla-server: true + # Generate request/response types from the spec schemas + models: true + # Do not generate an embedded spec — it adds binary bloat with no benefit here + embedded-spec: false + +output-options: + # Silence the "do not edit" header so editors don't flag the file in git diff + skip-fmt: false + # Keep generated file name stable for git + user-templates: {} diff --git a/api/openapi.yaml b/api/openapi.yaml new file mode 100644 index 000000000..758cac0a3 --- /dev/null +++ b/api/openapi.yaml @@ -0,0 +1,151 @@ +openapi: "3.0.3" +info: + title: Monitoring Plugin Management API + description: > + API for managing alert rules in OpenShift Monitoring Plugin. + All endpoints require a valid OpenShift user bearer token in the + Authorization header (forwarded by the console bridge). + version: "1.0.0" + +servers: + - url: /api/v1/alerting + +paths: + /rules: + post: + operationId: CreateAlertRule + summary: Create an alert rule + description: > + Creates a new alert rule. If prometheusRule is omitted the rule is + created as a platform alert rule; if prometheusRule is provided the + rule is created as a user-defined alert rule in the specified + PrometheusRule resource. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateAlertRuleRequest" + responses: + "201": + description: Alert rule created successfully + content: + application/json: + schema: + $ref: "#/components/schemas/CreateAlertRuleResponse" + "400": + description: Invalid request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Missing or invalid authorization token + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "404": + description: Resource not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "405": + description: Operation not allowed (e.g. rule is externally managed) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "409": + description: Conflict (e.g. duplicate rule ID) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "500": + description: Unexpected server error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + +components: + schemas: + AlertRuleSpec: + type: object + description: > + Specification of a Prometheus alerting or recording rule. + Maps to prometheus-operator Rule fields. + properties: + alert: + type: string + description: Name of the alert. Must be set for alerting rules. + record: + type: string + description: Name of the time series for recording rules. + expr: + type: string + description: PromQL expression to evaluate. + for: + type: string + description: Duration the condition must be true before firing (e.g. "5m"). + labels: + type: object + additionalProperties: + type: string + description: Labels to attach to alerts produced by the rule. + annotations: + type: object + additionalProperties: + type: string + description: Annotations to attach to alerts produced by the rule. + keepFiringFor: + type: string + description: > + Duration to keep alert firing after the condition is no longer true. + + PrometheusRuleTarget: + type: object + description: > + Identifies the PrometheusRule resource and rule group where the alert + rule will be stored. Required for user-defined alert rules. + required: + - prometheusRuleName + - prometheusRuleNamespace + properties: + prometheusRuleName: + type: string + description: Name of the PrometheusRule resource. + prometheusRuleNamespace: + type: string + description: Namespace of the PrometheusRule resource. + groupName: + type: string + description: Name of the rule group within the PrometheusRule. Optional. + + CreateAlertRuleRequest: + type: object + properties: + alertingRule: + $ref: "#/components/schemas/AlertRuleSpec" + prometheusRule: + $ref: "#/components/schemas/PrometheusRuleTarget" + + CreateAlertRuleResponse: + type: object + required: + - id + properties: + id: + type: string + description: Computed stable ID for the created alert rule. + + ErrorResponse: + type: object + required: + - error + properties: + error: + type: string + description: Human-readable error message. diff --git a/docs/alert-management.md b/docs/alert-management.md new file mode 100644 index 000000000..1ca39abf9 --- /dev/null +++ b/docs/alert-management.md @@ -0,0 +1,41 @@ +## Alert Management Notes + +This document covers alert management behavior and prerequisites for the monitoring plugin. + +### User workload monitoring prerequisites + +To include **user workload** alerts and rules in `/api/v1/alerting/alerts` and `/api/v1/alerting/rules`, the user workload monitoring stack must be enabled. Follow the OpenShift documentation for enabling and configuring UWM: + +https://docs.redhat.com/en/documentation/monitoring_stack_for_red_hat_openshift/4.20/html/configuring_user_workload_monitoring/configuring-alerts-and-notifications-uwm + +#### How the plugin reads user workload alerts/rules + +The plugin prefers **Thanos tenancy** for user workload alerts/rules (RBAC-scoped, requires a namespace parameter). When the client does not provide a `namespace` filter, the plugin discovers candidate namespaces and queries Thanos tenancy per-namespace, using the end-user bearer token. + +Routes in `openshift-user-workload-monitoring` are treated as **fallbacks** (and are also used for some health checks and pending state retrieval). + +If you want to create the user workload Prometheus route (optional), you can expose the service: + +```shell +oc -n openshift-user-workload-monitoring expose svc/prometheus-user-workload-web --name=prometheus-user-workload-web --port=web +``` + +If the route is missing/unreachable but tenancy is healthy, the plugin should still return user workload data and suppress route warnings. + +#### Alert states + +- `/api/v1/alerting/alerts?state=pending`: pending alerts come from Prometheus. +- `/api/v1/alerting/alerts?state=firing`: firing alerts come from Alertmanager when available. +- `/api/v1/alerting/alerts?state=silenced`: silenced alerts come from Alertmanager (requires an Alertmanager endpoint). + +### Alertmanager routing choices + +OpenShift supports routing user workload alerts to: + +- The **platform Alertmanager** (default instance) +- A **separate Alertmanager** for user workloads +- **External Alertmanager** instances + +This is a cluster configuration choice and does not change the plugin API shape. The plugin reads alerts from Alertmanager (for firing/silenced) and Prometheus (for pending), then merges platform and user workload results when available. + +The plugin intentionally reads from only the in-cluster Alertmanager endpoints. Supporting multiple external Alertmanagers would introduce ambiguous alert state and silencing outcomes because each instance can apply different routing, inhibition, and silence configurations. diff --git a/go.mod b/go.mod index d831b6f40..1e2bae37b 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,13 @@ require ( github.com/evanphx/json-patch v4.12.0+incompatible github.com/gorilla/handlers v1.5.2 github.com/gorilla/mux v1.8.1 + github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9 github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287 github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 + github.com/prometheus/common v0.67.4 + github.com/prometheus/prometheus v0.308.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.11.1 gopkg.in/yaml.v2 v2.4.0 @@ -20,7 +23,10 @@ require ( ) require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dennwc/varint v1.0.0 // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect @@ -43,16 +49,20 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/x448/float16 v0.8.4 // indirect + go.uber.org/atomic v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/net v0.46.0 // indirect diff --git a/go.sum b/go.sum index 565b23852..e70962788 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,57 @@ +cloud.google.com/go/auth v0.17.0 h1:74yCm7hCj2rUyyAocqnFzsAYXgJhrG26XCFimrc/Kz4= +cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UGXt7nCQ= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.1 h1:5YTBM8QDVIBN3sxBil89WfdAAqDZbyJTgh688DSxX5w= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.1/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.12.0 h1:wL5IEG5zb7BVv1Kv0Xm92orq+5hB5Nipn3B5tn4Rqfk= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.12.0/go.mod h1:J7MUC/wtRpfGVbQ5sIItY5/FuVWmvzlY21WAOfQnq/I= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= +github.com/aws/aws-sdk-go-v2 v1.39.6 h1:2JrPCVgWJm7bm83BDwY5z8ietmeJUbh3O2ACnn+Xsqk= +github.com/aws/aws-sdk-go-v2 v1.39.6/go.mod h1:c9pm7VwuW0UPxAEYGyTmyurVcNrbF6Rt/wixFqDhcjE= +github.com/aws/aws-sdk-go-v2/config v1.31.17 h1:QFl8lL6RgakNK86vusim14P2k8BFSxjvUkcWLDjgz9Y= +github.com/aws/aws-sdk-go-v2/config v1.31.17/go.mod h1:V8P7ILjp/Uef/aX8TjGk6OHZN6IKPM5YW6S78QnRD5c= +github.com/aws/aws-sdk-go-v2/credentials v1.18.21 h1:56HGpsgnmD+2/KpG0ikvvR8+3v3COCwaF4r+oWwOeNA= +github.com/aws/aws-sdk-go-v2/credentials v1.18.21/go.mod h1:3YELwedmQbw7cXNaII2Wywd+YY58AmLPwX4LzARgmmA= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 h1:T1brd5dR3/fzNFAQch/iBKeX07/ffu/cLu+q+RuzEWk= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13/go.mod h1:Peg/GBAQ6JDt+RoBf4meB1wylmAipb7Kg2ZFakZTlwk= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 h1:a+8/MLcWlIxo1lF9xaGt3J/u3yOZx+CdSveSNwjhD40= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13/go.mod h1:oGnKwIYZ4XttyU2JWxFrwvhF6YKiK/9/wmE3v3Iu9K8= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 h1:HBSI2kDkMdWz4ZM7FjwE7e/pWDEZ+nR95x8Ztet1ooY= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13/go.mod h1:YE94ZoDArI7awZqJzBAZ3PDD2zSfuP7w6P2knOzIn8M= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 h1:x2Ibm/Af8Fi+BH+Hsn9TXGdT+hKbDd5XOTZxTMxDk7o= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3/go.mod h1:IW1jwyrQgMdhisceG8fQLmQIydcT/jWY21rFhzgaKwo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 h1:kDqdFvMY4AtKoACfzIGD8A0+hbT41KTKF//gq7jITfM= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13/go.mod h1:lmKuogqSU3HzQCwZ9ZtcqOc5XGMqtDK7OIc2+DxiUEg= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.1 h1:0JPwLz1J+5lEOfy/g0SURC9cxhbQ1lIMHMa+AHZSzz0= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.1/go.mod h1:fKvyjJcz63iL/ftA6RaM8sRCtN4r4zl4tjL3qw5ec7k= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.5 h1:OWs0/j2UYR5LOGi88sD5/lhN6TDLG6SfA7CqsQO9zF0= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.5/go.mod h1:klO+ejMvYsB4QATfEOIXk8WAEwN4N0aBfJpvC+5SZBo= +github.com/aws/aws-sdk-go-v2/service/sts v1.39.1 h1:mLlUgHn02ue8whiR4BmxxGJLR2gwU6s6ZzJ5wDamBUs= +github.com/aws/aws-sdk-go-v2/service/sts v1.39.1/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk= +github.com/aws/smithy-go v1.23.2 h1:Crv0eatJUQhaManss33hS5r40CG3ZFH+21XSkqMrIUM= +github.com/aws/smithy-go v1.23.2/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= +github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= +github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= @@ -14,6 +64,8 @@ github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sa github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk= github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM= github.com/go-openapi/jsonreference v0.21.2 h1:Wxjda4M/BBQllegefXrY/9aq1fxBA8sI5M/lFU6tSWU= @@ -48,6 +100,10 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -55,20 +111,34 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 h1:ZI8gCoCjGzPsum4L21jHdQs8shFBIQih1TM9Rd/c+EQ= github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4= +github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= +github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= +github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 h1:cLN4IBkmkYZNnk7EAJ0BHIethd+J6LqxFNw5mSiI2bM= +github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= +github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -77,6 +147,11 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= +github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= +github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= @@ -87,6 +162,8 @@ github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287 h1:Spullg4rMMW github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287/go.mod h1:liCuDDdOsPSZIDP0QuTveFhF7ldXuvnPhBd/OTsJdJc= github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5 h1:CyPTfZvr+HvwXbix9kieI55HeFn4a5DBaxJ3DNFinhg= github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5/go.mod h1:/wmao3qtqOQ484HDka9cWP7SIvOQOdzpmhyXkF2YdzE= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -96,6 +173,22 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 h github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0/go.mod h1:WHiLZmOWVop/MoYvRD58LfnPeyE+dcITby/jQjg83Hw= github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 h1:rrZriucuC8ZUOPr8Asvavb9pbzqXSsAeY79aH8xnXlc= github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0/go.mod h1:OMvC2XJGxPeEAKf5qB1u7DudV46HA8ePxYslRjxQcbk= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_golang/exp v0.0.0-20250914183048-a974e0d45e0a h1:RF1vfKM34/3DbGNis22BGd6sDDY3XBi0eM7pYqmOEO0= +github.com/prometheus/client_golang/exp v0.0.0-20250914183048-a974e0d45e0a/go.mod h1:FGJuwvfcPY0V5enm+w8zF1RNS062yugQtPPQp1c4Io4= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc= +github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI= +github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos= +github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/prometheus/prometheus v0.308.0 h1:kVh/5m1n6m4cSK9HYTDEbMxzuzCWyEdPdKSxFRxXj04= +github.com/prometheus/prometheus v0.308.0/go.mod h1:xXYKzScyqyFHihpS0UsXpC2F3RA/CygOs7wb4mpdusE= +github.com/prometheus/sigv4 v0.3.0 h1:QIG7nTbu0JTnNidGI1Uwl5AGVIChWUACxn2B/BQ1kms= +github.com/prometheus/sigv4 v0.3.0/go.mod h1:fKtFYDus2M43CWKMNtGvFNHGXnAJJEGZbiYCmVp/F8I= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -113,6 +206,18 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= @@ -122,6 +227,10 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/exp v0.0.0-20250808145144-a408d31f581a h1:Y+7uR/b1Mw2iSXZ3G//1haIiSElDQZ8KWh0h+sZPG90= +golang.org/x/exp v0.0.0-20250808145144-a408d31f581a/go.mod h1:rT6SFzZ7oxADUDx58pcaKFTcZ+inxAa9fTrYx/uVYwg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -135,6 +244,8 @@ golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwE golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -159,6 +270,13 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.252.0 h1:xfKJeAJaMwb8OC9fesr369rjciQ704AjU/psjkKURSI= +google.golang.org/api v0.252.0/go.mod h1:dnHOv81x5RAmumZ7BWLShB/u7JZNeyalImxHmtTHxqw= +google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 h1:L6iMMGrtzgHsWofoFcihmDEMYeDR9KN/ThbPWGrh++g= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251002232023-7c0ddcbb5797 h1:CirRxTOwnRWVLKzDNrs0CXAaVozJoR4G9xvdRecrdpk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251002232023-7c0ddcbb5797/go.mod h1:HSkG/KdJWusxU1F6CNrwNDjBMgisKxGnc5dAZfT0mjQ= +google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= +google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/managementrouter/api_generated.go b/internal/managementrouter/api_generated.go new file mode 100644 index 000000000..555d00eaf --- /dev/null +++ b/internal/managementrouter/api_generated.go @@ -0,0 +1,219 @@ +// Package managementrouter provides primitives to interact with the openapi HTTP API. +// +// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.6.0 DO NOT EDIT. +package managementrouter + +import ( + "fmt" + "net/http" + + "github.com/gorilla/mux" +) + +// AlertRuleSpec Specification of a Prometheus alerting or recording rule. Maps to prometheus-operator Rule fields. +type AlertRuleSpec struct { + // Alert Name of the alert. Must be set for alerting rules. + Alert *string `json:"alert,omitempty"` + + // Annotations Annotations to attach to alerts produced by the rule. + Annotations *map[string]string `json:"annotations,omitempty"` + + // Expr PromQL expression to evaluate. + Expr *string `json:"expr,omitempty"` + + // For Duration the condition must be true before firing (e.g. "5m"). + For *string `json:"for,omitempty"` + + // KeepFiringFor Duration to keep alert firing after the condition is no longer true. + KeepFiringFor *string `json:"keepFiringFor,omitempty"` + + // Labels Labels to attach to alerts produced by the rule. + Labels *map[string]string `json:"labels,omitempty"` + + // Record Name of the time series for recording rules. + Record *string `json:"record,omitempty"` +} + +// CreateAlertRuleRequest defines model for CreateAlertRuleRequest. +type CreateAlertRuleRequest struct { + // AlertingRule Specification of a Prometheus alerting or recording rule. Maps to prometheus-operator Rule fields. + AlertingRule *AlertRuleSpec `json:"alertingRule,omitempty"` + + // PrometheusRule Identifies the PrometheusRule resource and rule group where the alert rule will be stored. Required for user-defined alert rules. + PrometheusRule *PrometheusRuleTarget `json:"prometheusRule,omitempty"` +} + +// CreateAlertRuleResponse defines model for CreateAlertRuleResponse. +type CreateAlertRuleResponse struct { + // Id Computed stable ID for the created alert rule. + Id string `json:"id"` +} + +// ErrorResponse defines model for ErrorResponse. +type ErrorResponse struct { + // Error Human-readable error message. + Error string `json:"error"` +} + +// PrometheusRuleTarget Identifies the PrometheusRule resource and rule group where the alert rule will be stored. Required for user-defined alert rules. +type PrometheusRuleTarget struct { + // GroupName Name of the rule group within the PrometheusRule. Optional. + GroupName *string `json:"groupName,omitempty"` + + // PrometheusRuleName Name of the PrometheusRule resource. + PrometheusRuleName string `json:"prometheusRuleName"` + + // PrometheusRuleNamespace Namespace of the PrometheusRule resource. + PrometheusRuleNamespace string `json:"prometheusRuleNamespace"` +} + +// CreateAlertRuleJSONRequestBody defines body for CreateAlertRule for application/json ContentType. +type CreateAlertRuleJSONRequestBody = CreateAlertRuleRequest + +// ServerInterface represents all server handlers. +type ServerInterface interface { + // Create an alert rule + // (POST /rules) + CreateAlertRule(w http.ResponseWriter, r *http.Request) +} + +// ServerInterfaceWrapper converts contexts to parameters. +type ServerInterfaceWrapper struct { + Handler ServerInterface + HandlerMiddlewares []MiddlewareFunc + ErrorHandlerFunc func(w http.ResponseWriter, r *http.Request, err error) +} + +type MiddlewareFunc func(http.Handler) http.Handler + +// CreateAlertRule operation middleware +func (siw *ServerInterfaceWrapper) CreateAlertRule(w http.ResponseWriter, r *http.Request) { + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.CreateAlertRule(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +type UnescapedCookieParamError struct { + ParamName string + Err error +} + +func (e *UnescapedCookieParamError) Error() string { + return fmt.Sprintf("error unescaping cookie parameter '%s'", e.ParamName) +} + +func (e *UnescapedCookieParamError) Unwrap() error { + return e.Err +} + +type UnmarshalingParamError struct { + ParamName string + Err error +} + +func (e *UnmarshalingParamError) Error() string { + return fmt.Sprintf("Error unmarshaling parameter %s as JSON: %s", e.ParamName, e.Err.Error()) +} + +func (e *UnmarshalingParamError) Unwrap() error { + return e.Err +} + +type RequiredParamError struct { + ParamName string +} + +func (e *RequiredParamError) Error() string { + return fmt.Sprintf("Query argument %s is required, but not found", e.ParamName) +} + +type RequiredHeaderError struct { + ParamName string + Err error +} + +func (e *RequiredHeaderError) Error() string { + return fmt.Sprintf("Header parameter %s is required, but not found", e.ParamName) +} + +func (e *RequiredHeaderError) Unwrap() error { + return e.Err +} + +type InvalidParamFormatError struct { + ParamName string + Err error +} + +func (e *InvalidParamFormatError) Error() string { + return fmt.Sprintf("Invalid format for parameter %s: %s", e.ParamName, e.Err.Error()) +} + +func (e *InvalidParamFormatError) Unwrap() error { + return e.Err +} + +type TooManyValuesForParamError struct { + ParamName string + Count int +} + +func (e *TooManyValuesForParamError) Error() string { + return fmt.Sprintf("Expected one value for %s, got %d", e.ParamName, e.Count) +} + +// Handler creates http.Handler with routing matching OpenAPI spec. +func Handler(si ServerInterface) http.Handler { + return HandlerWithOptions(si, GorillaServerOptions{}) +} + +type GorillaServerOptions struct { + BaseURL string + BaseRouter *mux.Router + Middlewares []MiddlewareFunc + ErrorHandlerFunc func(w http.ResponseWriter, r *http.Request, err error) +} + +// HandlerFromMux creates http.Handler with routing matching OpenAPI spec based on the provided mux. +func HandlerFromMux(si ServerInterface, r *mux.Router) http.Handler { + return HandlerWithOptions(si, GorillaServerOptions{ + BaseRouter: r, + }) +} + +func HandlerFromMuxWithBaseURL(si ServerInterface, r *mux.Router, baseURL string) http.Handler { + return HandlerWithOptions(si, GorillaServerOptions{ + BaseURL: baseURL, + BaseRouter: r, + }) +} + +// HandlerWithOptions creates http.Handler with additional options +func HandlerWithOptions(si ServerInterface, options GorillaServerOptions) http.Handler { + r := options.BaseRouter + + if r == nil { + r = mux.NewRouter() + } + if options.ErrorHandlerFunc == nil { + options.ErrorHandlerFunc = func(w http.ResponseWriter, r *http.Request, err error) { + http.Error(w, err.Error(), http.StatusBadRequest) + } + } + wrapper := ServerInterfaceWrapper{ + Handler: si, + HandlerMiddlewares: options.Middlewares, + ErrorHandlerFunc: options.ErrorHandlerFunc, + } + + r.HandleFunc(options.BaseURL+"/rules", wrapper.CreateAlertRule).Methods("POST") + + return r +} diff --git a/internal/managementrouter/create_alert_rule.go b/internal/managementrouter/create_alert_rule.go new file mode 100644 index 000000000..dead8d70f --- /dev/null +++ b/internal/managementrouter/create_alert_rule.go @@ -0,0 +1,97 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/management" +) + +// CreateAlertRule implements ServerInterface. +func (hr *httpRouter) CreateAlertRule(w http.ResponseWriter, req *http.Request) { + req.Body = http.MaxBytesReader(w, req.Body, maxRequestBodyBytes) + + var payload CreateAlertRuleRequest + if err := json.NewDecoder(req.Body).Decode(&payload); err != nil { + writeError(w, http.StatusBadRequest, "invalid request body") + return + } + + if payload.AlertingRule == nil { + writeError(w, http.StatusBadRequest, "alertingRule is required") + return + } + + alertRule := alertRuleSpecToMonitoringV1(*payload.AlertingRule) + + var ( + id string + err error + ) + + if payload.PrometheusRule != nil { + prOpts := prometheusRuleTargetToOptions(*payload.PrometheusRule) + id, err = hr.managementClient.CreateUserDefinedAlertRule(req.Context(), alertRule, prOpts) + } else { + id, err = hr.managementClient.CreatePlatformAlertRule(req.Context(), alertRule) + } + + if err != nil { + handleError(w, err) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + if err := json.NewEncoder(w).Encode(CreateAlertRuleResponse{Id: id}); err != nil { + log.WithError(err).Warn("failed to encode create alert rule response") + } +} + +// alertRuleSpecToMonitoringV1 maps the API-defined AlertRuleSpec to the +// prometheus-operator Rule type used by the management layer. +func alertRuleSpecToMonitoringV1(spec AlertRuleSpec) monitoringv1.Rule { + rule := monitoringv1.Rule{} + + if spec.Alert != nil { + rule.Alert = *spec.Alert + } + if spec.Record != nil { + rule.Record = *spec.Record + } + if spec.Expr != nil { + rule.Expr = intstr.FromString(*spec.Expr) + } + if spec.For != nil { + d := monitoringv1.Duration(*spec.For) + rule.For = &d + } + if spec.KeepFiringFor != nil { + d := monitoringv1.NonEmptyDuration(*spec.KeepFiringFor) + rule.KeepFiringFor = &d + } + if spec.Labels != nil { + rule.Labels = *spec.Labels + } + if spec.Annotations != nil { + rule.Annotations = *spec.Annotations + } + + return rule +} + +// prometheusRuleTargetToOptions maps the API-defined PrometheusRuleTarget to +// the management layer's PrometheusRuleOptions. +func prometheusRuleTargetToOptions(target PrometheusRuleTarget) management.PrometheusRuleOptions { + opts := management.PrometheusRuleOptions{ + Name: target.PrometheusRuleName, + Namespace: target.PrometheusRuleNamespace, + } + if target.GroupName != nil { + opts.GroupName = *target.GroupName + } + return opts +} diff --git a/internal/managementrouter/create_alert_rule_mapper_test.go b/internal/managementrouter/create_alert_rule_mapper_test.go new file mode 100644 index 000000000..09400ba06 --- /dev/null +++ b/internal/managementrouter/create_alert_rule_mapper_test.go @@ -0,0 +1,143 @@ +package managementrouter + +import ( + "testing" + + "k8s.io/apimachinery/pkg/util/intstr" +) + +func TestAlertRuleSpecToMonitoringV1_AlertFields(t *testing.T) { + alert := "MyAlert" + expr := "up == 0" + forDur := "5m" + keepFiringFor := "10m" + labels := map[string]string{"severity": "warning"} + annotations := map[string]string{"summary": "down"} + + spec := AlertRuleSpec{ + Alert: &alert, + Expr: &expr, + For: &forDur, + KeepFiringFor: &keepFiringFor, + Labels: &labels, + Annotations: &annotations, + } + + rule := alertRuleSpecToMonitoringV1(spec) + + if rule.Alert != alert { + t.Errorf("Alert: want %q, got %q", alert, rule.Alert) + } + if rule.Expr != intstr.FromString(expr) { + t.Errorf("Expr: want %v, got %v", intstr.FromString(expr), rule.Expr) + } + if rule.For == nil || string(*rule.For) != forDur { + t.Errorf("For: want %q, got %v", forDur, rule.For) + } + if rule.KeepFiringFor == nil || string(*rule.KeepFiringFor) != keepFiringFor { + t.Errorf("KeepFiringFor: want %q, got %v", keepFiringFor, rule.KeepFiringFor) + } + if rule.Labels["severity"] != "warning" { + t.Errorf("Labels: want severity=warning, got %v", rule.Labels) + } + if rule.Annotations["summary"] != "down" { + t.Errorf("Annotations: want summary=down, got %v", rule.Annotations) + } +} + +func TestAlertRuleSpecToMonitoringV1_RecordRule(t *testing.T) { + record := "job:up:sum" + expr := "sum(up) by (job)" + + spec := AlertRuleSpec{ + Record: &record, + Expr: &expr, + } + + rule := alertRuleSpecToMonitoringV1(spec) + + if rule.Record != record { + t.Errorf("Record: want %q, got %q", record, rule.Record) + } + if rule.Alert != "" { + t.Errorf("Alert should be empty for record rule, got %q", rule.Alert) + } + if rule.For != nil { + t.Errorf("For should be nil when not set, got %v", rule.For) + } +} + +func TestAlertRuleSpecToMonitoringV1_NilOptionalFields(t *testing.T) { + // Only required-ish field: nothing is actually required at the spec level. + // Verify zero values when optional pointers are nil. + rule := alertRuleSpecToMonitoringV1(AlertRuleSpec{}) + + if rule.Alert != "" { + t.Errorf("expected empty Alert, got %q", rule.Alert) + } + if rule.Record != "" { + t.Errorf("expected empty Record, got %q", rule.Record) + } + if rule.Expr != (intstr.IntOrString{}) { + t.Errorf("expected zero Expr, got %v", rule.Expr) + } + if rule.For != nil { + t.Errorf("expected nil For, got %v", rule.For) + } + if rule.KeepFiringFor != nil { + t.Errorf("expected nil KeepFiringFor, got %v", rule.KeepFiringFor) + } + if rule.Labels != nil { + t.Errorf("expected nil Labels, got %v", rule.Labels) + } + if rule.Annotations != nil { + t.Errorf("expected nil Annotations, got %v", rule.Annotations) + } +} + +func TestAlertRuleSpecToMonitoringV1_ForTypeMapped(t *testing.T) { + forDur := "2m30s" + spec := AlertRuleSpec{For: &forDur} + rule := alertRuleSpecToMonitoringV1(spec) + + if rule.For == nil { + t.Fatal("expected non-nil For") + } + if string(*rule.For) != forDur { + t.Errorf("For: want %q, got %q", forDur, string(*rule.For)) + } +} + +func TestPrometheusRuleTargetToOptions_WithGroupName(t *testing.T) { + group := "custom-group" + target := PrometheusRuleTarget{ + PrometheusRuleName: "my-rule", + PrometheusRuleNamespace: "my-ns", + GroupName: &group, + } + + opts := prometheusRuleTargetToOptions(target) + + if opts.Name != "my-rule" { + t.Errorf("Name: want 'my-rule', got %q", opts.Name) + } + if opts.Namespace != "my-ns" { + t.Errorf("Namespace: want 'my-ns', got %q", opts.Namespace) + } + if opts.GroupName != "custom-group" { + t.Errorf("GroupName: want 'custom-group', got %q", opts.GroupName) + } +} + +func TestPrometheusRuleTargetToOptions_WithoutGroupName(t *testing.T) { + target := PrometheusRuleTarget{ + PrometheusRuleName: "my-rule", + PrometheusRuleNamespace: "my-ns", + } + + opts := prometheusRuleTargetToOptions(target) + + if opts.GroupName != "" { + t.Errorf("GroupName should be empty when nil, got %q", opts.GroupName) + } +} diff --git a/internal/managementrouter/create_alert_rule_test.go b/internal/managementrouter/create_alert_rule_test.go new file mode 100644 index 000000000..b5ac7105c --- /dev/null +++ b/internal/managementrouter/create_alert_rule_test.go @@ -0,0 +1,537 @@ +package managementrouter_test + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +// bearerRequest builds a POST request with an Authorization header so the +// authMiddleware in the router is satisfied. +func bearerRequest(t *testing.T, url string, body []byte) *http.Request { + t.Helper() + req := httptest.NewRequest(http.MethodPost, url, bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer test-token") + return req +} + +func newTestRouter(mockK8s *testutils.MockClient) http.Handler { + mgmt := management.New(context.Background(), mockK8s) + return managementrouter.New(mgmt) +} + +func TestCreateAlertRule_CreateNewUserDefinedRule(t *testing.T) { + mockK8sRules := &testutils.MockPrometheusRuleInterface{} + mockARules := &testutils.MockAlertingRuleInterface{} + mockK8s := &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { return mockARules }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "cpuHigh", + "expr": "vector(1)", + "for": "5m", + "labels": map[string]string{"severity": "warning"}, + "annotations": map[string]string{"summary": "cpu high"}, + }, + "prometheusRule": map[string]interface{}{ + "prometheusRuleName": "user-pr", + "prometheusRuleNamespace": "default", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusCreated { + t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String()) + } + var resp struct { + Id string `json:"id"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + if resp.Id == "" { + t.Fatal("expected non-empty id") + } + + pr, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + if err != nil { + t.Fatalf("Get PrometheusRule: %v", err) + } + if !found { + t.Fatal("expected PrometheusRule to be found") + } + var allAlerts []string + for _, g := range pr.Spec.Groups { + for _, r := range g.Rules { + allAlerts = append(allAlerts, r.Alert) + } + } + if !contains(allAlerts, "cpuHigh") { + t.Errorf("expected cpuHigh in alerts, got %v", allAlerts) + } +} + +func TestCreateAlertRule_CustomGroupName(t *testing.T) { + mockK8sRules := &testutils.MockPrometheusRuleInterface{} + mockARules := &testutils.MockAlertingRuleInterface{} + mockK8s := &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { return mockARules }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "cpuCustomGroup", + "expr": "vector(1)", + }, + "prometheusRule": map[string]interface{}{ + "prometheusRuleName": "user-pr", + "prometheusRuleNamespace": "default", + "groupName": "custom-group", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusCreated { + t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String()) + } + + pr, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + if err != nil || !found { + t.Fatalf("PrometheusRule not found: %v", err) + } + + var grp *monitoringv1.RuleGroup + for i := range pr.Spec.Groups { + if pr.Spec.Groups[i].Name == "custom-group" { + grp = &pr.Spec.Groups[i] + break + } + } + if grp == nil { + t.Fatal("custom-group not found") + } + var alerts []string + for _, r := range grp.Rules { + alerts = append(alerts, r.Alert) + } + if !contains(alerts, "cpuCustomGroup") { + t.Errorf("expected cpuCustomGroup, got %v", alerts) + } +} + +func TestCreateAlertRule_InvalidJSON(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/alerting/rules", bytes.NewBufferString("{")) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if body := w.Body.String(); !jsonContains(body, "invalid request body") { + t.Errorf("expected 'invalid request body' in %q", body) + } +} + +func TestCreateAlertRule_MissingAlertingRule(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "prometheusRule": map[string]interface{}{ + "prometheusRuleName": "user-pr", + "prometheusRuleNamespace": "default", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if body := w.Body.String(); !jsonContains(body, "alertingRule is required") { + t.Errorf("expected 'alertingRule is required' in %q", body) + } +} + +func TestCreateAlertRule_MissingPRNameNamespace(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "x", + "expr": "vector(1)", + }, + "prometheusRule": map[string]interface{}{}, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if body := w.Body.String(); !jsonContains(body, "PrometheusRule Name and Namespace must be specified") { + t.Errorf("unexpected body: %q", body) + } +} + +func TestCreateAlertRule_PlatformManagedPR(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "x", + "expr": "vector(1)", + }, + "prometheusRule": map[string]interface{}{ + "prometheusRuleName": "platform-pr", + "prometheusRuleNamespace": "openshift-monitoring", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } + if body := w.Body.String(); !jsonContains(body, "cannot add user-defined alert rule to a platform-managed PrometheusRule") { + t.Errorf("unexpected body: %q", body) + } +} + +func TestCreateAlertRule_MissingAuthToken(t *testing.T) { + mockK8s := &testutils.MockClient{} + mgmt := management.New(context.Background(), mockK8s) + router := managementrouter.New(mgmt) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "x", + "expr": "vector(1)", + }, + } + buf, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/alerting/rules", bytes.NewReader(buf)) + req.Header.Set("Content-Type", "application/json") + // No Authorization header + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", w.Code) + } +} + +func TestCreateAlertRule_BodyTooLarge(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + // Build a payload that exceeds the 1 MB limit. + oversized := make([]byte, 2<<20) // 2 MB + for i := range oversized { + oversized[i] = 'x' + } + + req := httptest.NewRequest(http.MethodPost, "/api/v1/alerting/rules", bytes.NewReader(oversized)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400 for oversized body, got %d", w.Code) + } +} + +func TestCreateAlertRule_PlatformRuleCreated(t *testing.T) { + mockARules := &testutils.MockAlertingRuleInterface{} + mockK8s := &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{} + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { return mockARules }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + // No prometheusRule field → platform path. + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "PlatformAlert", + "expr": "up == 0", + "labels": map[string]string{"severity": "critical"}, + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusCreated { + t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String()) + } + var resp struct { + Id string `json:"id"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + if resp.Id == "" { + t.Fatal("expected non-empty id for platform rule") + } + + ar, found, err := mockARules.Get(context.Background(), "platform-alert-rules") + if err != nil { + t.Fatalf("Get AlertingRule: %v", err) + } + if !found { + t.Fatal("expected AlertingRule platform-alert-rules to exist") + } + var allAlerts []string + for _, g := range ar.Spec.Groups { + for _, r := range g.Rules { + allAlerts = append(allAlerts, r.Alert) + } + } + if !contains(allAlerts, "PlatformAlert") { + t.Errorf("expected PlatformAlert in AlertingRule, got %v", allAlerts) + } +} + +func TestCreateAlertRule_GenericErrorNotLeaked(t *testing.T) { + mockK8s := &testutils.MockClient{ + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + // Inject an unexpected error at the Get step so the management + // layer bubbles it up as a generic 500 (not a typed error). + GetFunc: func(_ context.Context, _ string) (*osmv1.AlertingRule, bool, error) { + return nil, false, errors.New("internal db connection failed: secret details") + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{} + }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "SomeAlert", + "expr": "up == 0", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String()) + } + body500 := w.Body.String() + // Internal error message must NOT appear in the response. + if containsStr(body500, "internal db connection failed") || containsStr(body500, "secret details") { + t.Errorf("internal error detail leaked to client: %s", body500) + } + if !jsonContains(body500, "unexpected error") { + t.Errorf("expected generic error message, got: %s", body500) + } +} + +func TestCreateAlertRule_AllFieldsMapped(t *testing.T) { + mockK8sRules := &testutils.MockPrometheusRuleInterface{} + mockK8s := &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { return &testutils.MockAlertingRuleInterface{} }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + } + router := newTestRouter(mockK8s) + + body := map[string]interface{}{ + "alertingRule": map[string]interface{}{ + "alert": "FullAlert", + "expr": "up == 0", + "for": "5m", + "keepFiringFor": "10m", + "labels": map[string]string{"severity": "warning", "team": "sre"}, + "annotations": map[string]string{"summary": "Instance down", "runbook": "http://wiki/runbook"}, + }, + "prometheusRule": map[string]interface{}{ + "prometheusRuleName": "full-pr", + "prometheusRuleNamespace": "default", + }, + } + buf, _ := json.Marshal(body) + + req := bearerRequest(t, "/api/v1/alerting/rules", buf) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusCreated { + t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String()) + } + + pr, found, err := mockK8sRules.Get(context.Background(), "default", "full-pr") + if err != nil || !found { + t.Fatalf("PrometheusRule not found: %v", err) + } + + var rule *monitoringv1.Rule + for _, g := range pr.Spec.Groups { + for i := range g.Rules { + if g.Rules[i].Alert == "FullAlert" { + rule = &g.Rules[i] + } + } + } + if rule == nil { + t.Fatal("FullAlert rule not found in PrometheusRule") + } + if rule.Expr.String() != "up == 0" { + t.Errorf("expr: want 'up == 0', got %q", rule.Expr.String()) + } + if rule.For == nil || string(*rule.For) != "5m" { + t.Errorf("for: want '5m', got %v", rule.For) + } + if rule.KeepFiringFor == nil || string(*rule.KeepFiringFor) != "10m" { + t.Errorf("keepFiringFor: want '10m', got %v", rule.KeepFiringFor) + } + if rule.Labels["severity"] != "warning" || rule.Labels["team"] != "sre" { + t.Errorf("labels mismatch: %v", rule.Labels) + } + if rule.Annotations["summary"] != "Instance down" { + t.Errorf("annotations mismatch: %v", rule.Annotations) + } +} + +// contains reports whether s is in ss. +func contains(ss []string, s string) bool { + for _, v := range ss { + if v == s { + return true + } + } + return false +} + +// jsonContains checks whether the JSON body's "error" field contains substr, +// or the raw string contains substr as a fallback. +func jsonContains(body, substr string) bool { + var m map[string]string + if err := json.Unmarshal([]byte(body), &m); err == nil { + return contains([]string{m["error"]}, substr) || len(m["error"]) > 0 && containsStr(m["error"], substr) + } + return containsStr(body, substr) +} + +func containsStr(s, sub string) bool { + return len(s) >= len(sub) && (s == sub || len(sub) == 0 || func() bool { + for i := 0; i <= len(s)-len(sub); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false + }()) +} diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go new file mode 100644 index 000000000..85c7a7b31 --- /dev/null +++ b/internal/managementrouter/router.go @@ -0,0 +1,100 @@ +// Package managementrouter implements the management API HTTP handlers. +// The OpenAPI spec lives in api/openapi.yaml. Regenerate bindings with: +// +//go:generate go run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen --config ../../api/oapi-codegen.yaml ../../api/openapi.yaml +package managementrouter + +import ( + "encoding/json" + "errors" + "net/http" + + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" +) + +var log = logrus.WithField("module", "managementrouter") + +// maxRequestBodyBytes limits incoming request bodies to 1 MB across all handlers. +const maxRequestBodyBytes = 1 << 20 // 1 MB + +type httpRouter struct { + managementClient management.Client +} + +// New creates the management API router. Routes are registered via the +// generated HandlerWithOptions so they stay in sync with the OpenAPI spec. +func New(managementClient management.Client) *mux.Router { + hr := &httpRouter{managementClient: managementClient} + + r := mux.NewRouter() + r.Use(authMiddleware) + + HandlerWithOptions(hr, GorillaServerOptions{ + BaseURL: "/api/v1/alerting", + BaseRouter: r, + }) + + return r +} + +// authMiddleware extracts the user's bearer token forwarded by the OpenShift +// console bridge and stores it in the request context so downstream handlers +// can perform API calls on behalf of the user. +func authMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + auth := r.Header.Get("Authorization") + token := "" + if len(auth) > 7 && auth[:7] == "Bearer " { + token = auth[7:] + } + if token == "" { + writeError(w, http.StatusUnauthorized, "missing authorization token") + return + } + ctx := k8s.WithBearerToken(r.Context(), token) + next.ServeHTTP(w, r.WithContext(ctx)) + }) +} + +func writeError(w http.ResponseWriter, statusCode int, message string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + resp, err := json.Marshal(map[string]string{"error": message}) + if err != nil { + // json.Marshal on map[string]string never fails in practice. + panic(err) + } + if _, err := w.Write(resp); err != nil { + log.WithError(err).Warn("failed to write error response") + } +} + +func handleError(w http.ResponseWriter, err error) { + status, message := parseError(err) + writeError(w, status, message) +} + +func parseError(err error) (int, string) { + var nf *management.NotFoundError + if errors.As(err, &nf) { + return http.StatusNotFound, err.Error() + } + var ve *management.ValidationError + if errors.As(err, &ve) { + return http.StatusBadRequest, err.Error() + } + var na *management.NotAllowedError + if errors.As(err, &na) { + return http.StatusMethodNotAllowed, err.Error() + } + var ce *management.ConflictError + if errors.As(err, &ce) { + return http.StatusConflict, err.Error() + } + log.WithError(err).Error("unexpected management API error") + return http.StatusInternalServerError, "An unexpected error occurred" +} diff --git a/pkg/alert_rule/alert_rule.go b/pkg/alert_rule/alert_rule.go new file mode 100644 index 000000000..862cb59ac --- /dev/null +++ b/pkg/alert_rule/alert_rule.go @@ -0,0 +1,96 @@ +package alertrule + +import ( + "crypto/sha256" + "encoding/base64" + "fmt" + "regexp" + "sort" + "strings" + "unicode/utf8" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/promql/parser" + + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +var promLabelNameRegexp = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_]*$`) + +func GetAlertingRuleId(alertRule *monitoringv1.Rule) string { + var name string + var kind string + if alertRule.Alert != "" { + name = alertRule.Alert + kind = "alert" + } else if alertRule.Record != "" { + name = alertRule.Record + kind = "record" + } else { + return "" + } + + expr := NormalizeExpr(alertRule.Expr.String()) + forDuration := "" + if alertRule.For != nil { + forDuration = strings.TrimSpace(string(*alertRule.For)) + } + + labelsBlock := normalizedBusinessLabelsBlock(alertRule.Labels) + + // Canonical payload is intentionally derived from rule spec (expr/for/labels) and identity (kind/name), + // and excludes annotations and openshift_io_* provenance/system labels. + canonicalPayload := strings.Join([]string{kind, name, expr, forDuration, labelsBlock}, "\n---\n") + + // Generate SHA256 hash + hash := sha256.Sum256([]byte(canonicalPayload)) + + return "rid_" + base64.RawURLEncoding.EncodeToString(hash[:]) +} + +// NormalizeExpr normalises a PromQL expression to a canonical string so that +// cosmetic formatting differences do not produce different rule IDs. Using the +// PromQL parser preserves whitespace inside quoted string literals, which plain +// strings.Fields would incorrectly collapse (e.g. up{job="job 1"} vs +// up{job="job 1"} are semantically distinct selectors). +func NormalizeExpr(expr string) string { + parsed, err := parser.ParseExpr(expr) + if err != nil { + // Fall back to simple trimming for recording rules or unparseable input. + return strings.TrimSpace(expr) + } + return parsed.String() +} + +func normalizedBusinessLabelsBlock(in map[string]string) string { + if len(in) == 0 { + return "" + } + + lines := make([]string, 0, len(in)) + for k, v := range in { + key := strings.TrimSpace(k) + if key == "" { + continue + } + if strings.HasPrefix(key, "openshift_io_") || key == managementlabels.AlertNameLabel { + // Skip system labels + continue + } + if !promLabelNameRegexp.MatchString(key) { + continue + } + if v == "" { + // Align with specHash behavior: drop empty values + continue + } + if !utf8.ValidString(v) { + continue + } + + lines = append(lines, fmt.Sprintf("%s=%s", key, v)) + } + + sort.Strings(lines) + return strings.Join(lines, "\n") +} diff --git a/pkg/alert_rule/alert_rule_test.go b/pkg/alert_rule/alert_rule_test.go new file mode 100644 index 000000000..5362125f8 --- /dev/null +++ b/pkg/alert_rule/alert_rule_test.go @@ -0,0 +1,149 @@ +package alertrule_test + +import ( + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" +) + +func durPtr(s monitoringv1.Duration) *monitoringv1.Duration { return &s } + +// TestNormalizeExpr_BasicCanonicalization verifies that the PromQL parser +// produces a deterministic canonical form. +func TestNormalizeExpr_BasicCanonicalization(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "leading/trailing whitespace stripped", + in: " up ", + want: "up", + }, + { + name: "extra spaces between tokens collapsed", + in: "up == 0", + want: "up == 0", + }, + { + name: "label selector formatting normalised", + in: `up{job="prometheus"}`, + want: `up{job="prometheus"}`, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + got := alertrule.NormalizeExpr(tc.in) + if got != tc.want { + t.Errorf("NormalizeExpr(%q) = %q, want %q", tc.in, got, tc.want) + } + }) + } +} + +// TestNormalizeExpr_PreservesWhitespaceInsideStringLiteral verifies that +// whitespace inside a quoted label value is NOT collapsed. This was the bug +// with the previous strings.Fields-based implementation. +func TestNormalizeExpr_PreservesWhitespaceInsideStringLiteral(t *testing.T) { + single := `up{job="job 1"}` + double := `up{job="job 1"}` + + normSingle := alertrule.NormalizeExpr(single) + normDouble := alertrule.NormalizeExpr(double) + + if normSingle == normDouble { + t.Errorf("NormalizeExpr collapsed whitespace inside quoted string: %q == %q", normSingle, normDouble) + } +} + +// TestNormalizeExpr_UnparseableExprFallback checks that an expression that the +// PromQL parser cannot parse (e.g. a recording rule metric name alone) is +// returned trimmed without crashing. +func TestNormalizeExpr_UnparseableExprFallback(t *testing.T) { + in := " some_record_rule " + got := alertrule.NormalizeExpr(in) + want := "some_record_rule" + if got != want { + t.Errorf("NormalizeExpr(%q) = %q, want %q", in, got, want) + } +} + +// TestGetAlertingRuleId_Stability checks that the same rule always produces +// the same ID. +func TestGetAlertingRuleId_Stability(t *testing.T) { + rule := &monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + For: durPtr("5m"), + Labels: map[string]string{ + "severity": "warning", + }, + } + id1 := alertrule.GetAlertingRuleId(rule) + id2 := alertrule.GetAlertingRuleId(rule) + if id1 != id2 { + t.Errorf("GetAlertingRuleId is not stable: %q != %q", id1, id2) + } + if id1 == "" { + t.Error("GetAlertingRuleId returned empty string") + } +} + +// TestGetAlertingRuleId_SystemLabelExcluded verifies that changing an +// openshift_io_* label (system label) does not change the ID. +func TestGetAlertingRuleId_SystemLabelExcluded(t *testing.T) { + base := &monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + withSystem := &monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + "openshift_io_rule_managed_by": "operator", + "openshift_io_alerting_rule_id_key": "some-id", + }, + } + if alertrule.GetAlertingRuleId(base) != alertrule.GetAlertingRuleId(withSystem) { + t.Error("system labels should not affect the rule ID") + } +} + +// TestGetAlertingRuleId_DifferentRulesDifferentIds verifies that two rules +// with different expressions produce different IDs. +func TestGetAlertingRuleId_DifferentRulesDifferentIds(t *testing.T) { + r1 := &monitoringv1.Rule{Alert: "A", Expr: intstr.FromString("up == 0")} + r2 := &monitoringv1.Rule{Alert: "A", Expr: intstr.FromString("up == 1")} + if alertrule.GetAlertingRuleId(r1) == alertrule.GetAlertingRuleId(r2) { + t.Error("different expressions should produce different IDs") + } +} + +// TestGetAlertingRuleId_QuotedStringDistinction verifies that two rules whose +// only difference is whitespace inside a quoted label value get different IDs. +func TestGetAlertingRuleId_QuotedStringDistinction(t *testing.T) { + r1 := &monitoringv1.Rule{Alert: "A", Expr: intstr.FromString(`up{job="job 1"}`)} + r2 := &monitoringv1.Rule{Alert: "A", Expr: intstr.FromString(`up{job="job 1"}`)} + if alertrule.GetAlertingRuleId(r1) == alertrule.GetAlertingRuleId(r2) { + t.Error("selectors with different quoted-string whitespace should have different IDs") + } +} + +// TestGetAlertingRuleId_EmptyRule verifies that a rule with no alert or record +// name returns an empty string. +func TestGetAlertingRuleId_EmptyRule(t *testing.T) { + rule := &monitoringv1.Rule{Expr: intstr.FromString("up")} + if id := alertrule.GetAlertingRuleId(rule); id != "" { + t.Errorf("expected empty ID for unnamed rule, got %q", id) + } +} diff --git a/pkg/k8s/alert_relabel_config.go b/pkg/k8s/alert_relabel_config.go new file mode 100644 index 000000000..07beb6b5e --- /dev/null +++ b/pkg/k8s/alert_relabel_config.go @@ -0,0 +1,132 @@ +package k8s + +import ( + "context" + "fmt" + + osmv1 "github.com/openshift/api/monitoring/v1" + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" +) + +type alertRelabelConfigManager struct { + clientset *osmv1client.Clientset + config *rest.Config + arcInformer cache.SharedIndexInformer +} + +func newAlertRelabelConfigManager(ctx context.Context, clientset *osmv1client.Clientset, config *rest.Config) (*alertRelabelConfigManager, error) { + arcInformer := cache.NewSharedIndexInformer( + alertRelabelConfigListWatchForAllNamespaces(clientset), + &osmv1.AlertRelabelConfig{}, + 0, + cache.Indexers{}, + ) + + arcm := &alertRelabelConfigManager{ + clientset: clientset, + config: config, + arcInformer: arcInformer, + } + + go arcm.arcInformer.Run(ctx.Done()) + + if !cache.WaitForNamedCacheSync("AlertRelabelConfig informer", ctx.Done(), arcm.arcInformer.HasSynced) { + return nil, fmt.Errorf("failed to sync AlertRelabelConfig informer") + } + + return arcm, nil +} + +func alertRelabelConfigListWatchForAllNamespaces(clientset *osmv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "alertrelabelconfigs", "", fields.Everything()) +} + +func (arcm *alertRelabelConfigManager) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + arcs := arcm.arcInformer.GetStore().List() + + alertRelabelConfigs := make([]osmv1.AlertRelabelConfig, 0, len(arcs)) + for _, item := range arcs { + arc, ok := item.(*osmv1.AlertRelabelConfig) + if !ok { + continue + } + if namespace != "" && arc.Namespace != namespace { + continue + } + alertRelabelConfigs = append(alertRelabelConfigs, *arc) + } + + return alertRelabelConfigs, nil +} + +func (arcm *alertRelabelConfigManager) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + arc, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, err + } + + return arc, true, nil +} + +func (arcm *alertRelabelConfigManager) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + cs, err := arcm.clientsetForCtx(ctx) + if err != nil { + return nil, err + } + created, err := cs.MonitoringV1().AlertRelabelConfigs(arc.Namespace).Create(ctx, &arc, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + + return created, nil +} + +func (arcm *alertRelabelConfigManager) Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + cs, err := arcm.clientsetForCtx(ctx) + if err != nil { + return err + } + _, err = cs.MonitoringV1().AlertRelabelConfigs(arc.Namespace).Update(ctx, &arc, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + + return nil +} + +func (arcm *alertRelabelConfigManager) Delete(ctx context.Context, namespace string, name string) error { + cs, err := arcm.clientsetForCtx(ctx) + if err != nil { + return err + } + err = cs.MonitoringV1().AlertRelabelConfigs(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete AlertRelabelConfig %s/%s: %w", namespace, name, err) + } + + return nil +} + +// clientsetForCtx returns a user-scoped clientset when the context carries a +// bearer token (i.e. on API handler requests), or the SA-level clientset for +// background / informer bootstrap calls. +func (arcm *alertRelabelConfigManager) clientsetForCtx(ctx context.Context) (*osmv1client.Clientset, error) { + token := BearerTokenFromContext(ctx) + if token == "" { + return arcm.clientset, nil + } + cs, err := newUserScopedClientsets(arcm.config, token) + if err != nil { + return nil, fmt.Errorf("failed to create user-scoped clientset: %w", err) + } + return cs.osmV1, nil +} diff --git a/pkg/k8s/alerting_rule.go b/pkg/k8s/alerting_rule.go new file mode 100644 index 000000000..c31c47dfd --- /dev/null +++ b/pkg/k8s/alerting_rule.go @@ -0,0 +1,143 @@ +package k8s + +import ( + "context" + "fmt" + + osmv1 "github.com/openshift/api/monitoring/v1" + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" +) + +type alertingRuleManager struct { + clientset *osmv1client.Clientset + config *rest.Config + informer cache.SharedIndexInformer +} + +func newAlertingRuleManager(ctx context.Context, clientset *osmv1client.Clientset, config *rest.Config) (*alertingRuleManager, error) { + informer := cache.NewSharedIndexInformer( + alertingRuleListWatchClusterMonitoringNamespace(clientset), + &osmv1.AlertingRule{}, + 0, + cache.Indexers{}, + ) + + arm := &alertingRuleManager{ + clientset: clientset, + config: config, + informer: informer, + } + + go arm.informer.Run(ctx.Done()) + + if !cache.WaitForNamedCacheSync("AlertingRule informer", ctx.Done(), arm.informer.HasSynced) { + return nil, errors.NewInternalError(fmt.Errorf("failed to sync AlertingRule informer")) + } + + return arm, nil +} + +func alertingRuleListWatchClusterMonitoringNamespace(clientset *osmv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "alertingrules", ClusterMonitoringNamespace, fields.Everything()) +} + +func (arm *alertingRuleManager) List(ctx context.Context) ([]osmv1.AlertingRule, error) { + items := arm.informer.GetStore().List() + + alertingRules := make([]osmv1.AlertingRule, 0, len(items)) + for _, item := range items { + ar, ok := item.(*osmv1.AlertingRule) + if !ok { + continue + } + alertingRules = append(alertingRules, *ar) + } + + return alertingRules, nil +} + +func (arm *alertingRuleManager) Get(ctx context.Context, name string) (*osmv1.AlertingRule, bool, error) { + ar, err := arm.clientset.MonitoringV1().AlertingRules(ClusterMonitoringNamespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, err + } + + return ar, true, nil +} + +func (arm *alertingRuleManager) Create(ctx context.Context, ar osmv1.AlertingRule) (*osmv1.AlertingRule, error) { + if ar.Namespace != "" && ar.Namespace != ClusterMonitoringNamespace { + return nil, fmt.Errorf("invalid namespace %q: AlertingRule manager only supports %q", ar.Namespace, ClusterMonitoringNamespace) + } + + cs, err := arm.clientsetForCtx(ctx) + if err != nil { + return nil, err + } + + created, err := cs.MonitoringV1().AlertingRules(ClusterMonitoringNamespace).Create(ctx, &ar, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create AlertingRule %s/%s: %w", ClusterMonitoringNamespace, ar.Name, err) + } + + return created, nil +} + +func (arm *alertingRuleManager) Update(ctx context.Context, ar osmv1.AlertingRule) error { + if ar.Namespace != "" && ar.Namespace != ClusterMonitoringNamespace { + return fmt.Errorf("invalid namespace %q: AlertingRule manager only supports %q", ar.Namespace, ClusterMonitoringNamespace) + } + + cs, err := arm.clientsetForCtx(ctx) + if err != nil { + return err + } + + _, err = cs.MonitoringV1().AlertingRules(ClusterMonitoringNamespace).Update(ctx, &ar, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update AlertingRule %s/%s: %w", ClusterMonitoringNamespace, ar.Name, err) + } + + return nil +} + +func (arm *alertingRuleManager) Delete(ctx context.Context, name string) error { + cs, err := arm.clientsetForCtx(ctx) + if err != nil { + return err + } + + err = cs.MonitoringV1().AlertingRules(ClusterMonitoringNamespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete AlertingRule %s/%s: %w", ClusterMonitoringNamespace, name, err) + } + + return nil +} + +// clientsetForCtx returns a user-scoped clientset when the context carries a +// bearer token (i.e. on API handler requests), or the SA-level clientset for +// background / informer bootstrap calls. +func (arm *alertingRuleManager) clientsetForCtx(ctx context.Context) (*osmv1client.Clientset, error) { + token := BearerTokenFromContext(ctx) + if token == "" { + return arm.clientset, nil + } + cs, err := newUserScopedClientsets(arm.config, token) + if err != nil { + return nil, fmt.Errorf("failed to create user-scoped clientset: %w", err) + } + return cs.osmV1, nil +} diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index 1fd6fbc4d..6370270ff 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -6,11 +6,12 @@ import ( osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "github.com/sirupsen/logrus" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" ) -//var log = logrus.WithField("module", "k8s") +var log = logrus.WithField("module", "k8s") var _ Client = (*client)(nil) @@ -20,8 +21,11 @@ type client struct { osmv1clientset *osmv1client.Clientset config *rest.Config - prometheusRuleManager *prometheusRuleManager - namespaceManager *namespaceManager + prometheusRuleManager *prometheusRuleManager + alertRelabelConfigManager *alertRelabelConfigManager + alertingRuleManager *alertingRuleManager + namespaceManager *namespaceManager + relabeledRulesManager *relabeledRulesManager } func NewClient(ctx context.Context, config *rest.Config) (Client, error) { @@ -47,16 +51,31 @@ func NewClient(ctx context.Context, config *rest.Config) (Client, error) { config: config, } - c.prometheusRuleManager, err = newPrometheusRuleManager(ctx, monitoringv1clientset) + c.prometheusRuleManager, err = newPrometheusRuleManager(ctx, monitoringv1clientset, config) if err != nil { return nil, fmt.Errorf("failed to create PrometheusRule manager: %w", err) } + c.alertRelabelConfigManager, err = newAlertRelabelConfigManager(ctx, osmv1clientset, config) + if err != nil { + return nil, fmt.Errorf("failed to create alert relabel config manager: %w", err) + } + + c.alertingRuleManager, err = newAlertingRuleManager(ctx, osmv1clientset, config) + if err != nil { + return nil, fmt.Errorf("failed to create alerting rule manager: %w", err) + } + c.namespaceManager, err = newNamespaceManager(ctx, clientset) if err != nil { return nil, fmt.Errorf("failed to create namespace manager: %w", err) } + c.relabeledRulesManager, err = newRelabeledRulesManager(ctx, c.namespaceManager, c.alertRelabelConfigManager, monitoringv1clientset, clientset) + if err != nil { + return nil, fmt.Errorf("failed to create relabeled rules config manager: %w", err) + } + return c, nil } @@ -72,6 +91,18 @@ func (c *client) PrometheusRules() PrometheusRuleInterface { return c.prometheusRuleManager } +func (c *client) AlertRelabelConfigs() AlertRelabelConfigInterface { + return c.alertRelabelConfigManager +} + +func (c *client) AlertingRules() AlertingRuleInterface { + return c.alertingRuleManager +} + +func (c *client) RelabeledRules() RelabeledRulesInterface { + return c.relabeledRulesManager +} + func (c *client) Namespace() NamespaceInterface { return c.namespaceManager } diff --git a/pkg/k8s/external_management.go b/pkg/k8s/external_management.go new file mode 100644 index 000000000..7671c87e7 --- /dev/null +++ b/pkg/k8s/external_management.go @@ -0,0 +1,49 @@ +package k8s + +import ( + "reflect" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// External management detection keys +const ( + ArgocdArgoprojIoPrefix = "argocd.argoproj.io/" + AppKubernetesIoManagedBy = "app.kubernetes.io/managed-by" +) + +// IsManagedByGitOps returns true if the provided annotations/labels indicate GitOps (e.g., ArgoCD) management. +func IsManagedByGitOps(annotations map[string]string, labels map[string]string) bool { + for k := range annotations { + if strings.HasPrefix(k, ArgocdArgoprojIoPrefix) { + return true + } + } + for k := range labels { + if strings.HasPrefix(k, ArgocdArgoprojIoPrefix) { + return true + } + } + if v, ok := labels[AppKubernetesIoManagedBy]; ok { + vl := strings.ToLower(strings.TrimSpace(v)) + if vl == "openshift-gitops" || vl == "argocd-cluster" || vl == "argocd" || strings.Contains(vl, "gitops") { + return true + } + } + return false +} + +// IsExternallyManagedObject returns whether an object is GitOps-managed and/or operator-managed. +func IsExternallyManagedObject(obj metav1.Object) (gitOpsManaged bool, operatorManaged bool) { + if obj == nil { + return false, false + } + // Handle typed-nil underlying values + if rv := reflect.ValueOf(obj); rv.Kind() == reflect.Ptr && rv.IsNil() { + return false, false + } + gitOpsManaged = IsManagedByGitOps(obj.GetAnnotations(), obj.GetLabels()) + operatorManaged = len(obj.GetOwnerReferences()) > 0 + return +} diff --git a/pkg/k8s/external_management_test.go b/pkg/k8s/external_management_test.go new file mode 100644 index 000000000..a422568c6 --- /dev/null +++ b/pkg/k8s/external_management_test.go @@ -0,0 +1,109 @@ +package k8s_test + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type testObject struct { + metav1.ObjectMeta +} + +func obj(annotations, labels map[string]string, owners []metav1.OwnerReference) *testObject { + return &testObject{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: annotations, + Labels: labels, + OwnerReferences: owners, + }, + } +} + +func TestIsExternallyManagedObject_NilObject(t *testing.T) { + gitOps, operator := k8s.IsExternallyManagedObject(nil) + if gitOps || operator { + t.Errorf("nil object: expected (false, false), got (%v, %v)", gitOps, operator) + } +} + +func TestIsExternallyManagedObject_NoAnnotations(t *testing.T) { + o := obj(nil, nil, nil) + gitOps, operator := k8s.IsExternallyManagedObject(o) + if gitOps || operator { + t.Errorf("plain object: expected (false, false), got (%v, %v)", gitOps, operator) + } +} + +func TestIsExternallyManagedObject_ArgocdAnnotation(t *testing.T) { + o := obj(map[string]string{"argocd.argoproj.io/tracking-id": "abc"}, nil, nil) + gitOps, operator := k8s.IsExternallyManagedObject(o) + if !gitOps { + t.Error("expected gitOps=true for argocd annotation") + } + if operator { + t.Error("expected operator=false when no owners") + } +} + +func TestIsExternallyManagedObject_ArgocdLabel(t *testing.T) { + o := obj(nil, map[string]string{"argocd.argoproj.io/app-name": "myapp"}, nil) + gitOps, _ := k8s.IsExternallyManagedObject(o) + if !gitOps { + t.Error("expected gitOps=true for argocd label") + } +} + +func TestIsExternallyManagedObject_ManagedByGitOpsLabel(t *testing.T) { + o := obj(nil, map[string]string{"app.kubernetes.io/managed-by": "openshift-gitops"}, nil) + gitOps, _ := k8s.IsExternallyManagedObject(o) + if !gitOps { + t.Error("expected gitOps=true for managed-by=openshift-gitops label") + } +} + +func TestIsExternallyManagedObject_OperatorOwnerRef(t *testing.T) { + o := obj(nil, nil, []metav1.OwnerReference{{Kind: "Deployment", Name: "some-operator"}}) + gitOps, operator := k8s.IsExternallyManagedObject(o) + if gitOps { + t.Error("expected gitOps=false when no argocd markers") + } + if !operator { + t.Error("expected operator=true when owner references exist") + } +} + +func TestIsExternallyManagedObject_BothGitOpsAndOperator(t *testing.T) { + o := obj( + map[string]string{"argocd.argoproj.io/tracking-id": "abc"}, + nil, + []metav1.OwnerReference{{Kind: "Deployment", Name: "some-operator"}}, + ) + gitOps, operator := k8s.IsExternallyManagedObject(o) + if !gitOps || !operator { + t.Errorf("expected (true, true), got (%v, %v)", gitOps, operator) + } +} + +func TestIsManagedByGitOps_ContainsGitOps(t *testing.T) { + labels := map[string]string{"app.kubernetes.io/managed-by": "my-gitops-tool"} + if !k8s.IsManagedByGitOps(nil, labels) { + t.Error("expected true for label containing 'gitops'") + } +} + +func TestIsManagedByGitOps_ArgocdCluster(t *testing.T) { + labels := map[string]string{"app.kubernetes.io/managed-by": "argocd-cluster"} + if !k8s.IsManagedByGitOps(nil, labels) { + t.Error("expected true for managed-by=argocd-cluster") + } +} + +func TestIsManagedByGitOps_UnrelatedLabel(t *testing.T) { + labels := map[string]string{"app.kubernetes.io/managed-by": "helm"} + if k8s.IsManagedByGitOps(nil, labels) { + t.Error("expected false for managed-by=helm") + } +} diff --git a/pkg/k8s/prometheus_rule.go b/pkg/k8s/prometheus_rule.go index ddcf4b4de..98c786170 100644 --- a/pkg/k8s/prometheus_rule.go +++ b/pkg/k8s/prometheus_rule.go @@ -10,15 +10,18 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/retry" ) type prometheusRuleManager struct { clientset *monitoringv1client.Clientset + config *rest.Config informer cache.SharedIndexInformer } -func newPrometheusRuleManager(ctx context.Context, clientset *monitoringv1client.Clientset) (*prometheusRuleManager, error) { +func newPrometheusRuleManager(ctx context.Context, clientset *monitoringv1client.Clientset, config *rest.Config) (*prometheusRuleManager, error) { informer := cache.NewSharedIndexInformer( prometheusRuleListWatchForAllNamespaces(clientset), &monitoringv1.PrometheusRule{}, @@ -34,6 +37,7 @@ func newPrometheusRuleManager(ctx context.Context, clientset *monitoringv1client return &prometheusRuleManager{ clientset: clientset, + config: config, informer: informer, }, nil } @@ -71,7 +75,11 @@ func (prm *prometheusRuleManager) Get(ctx context.Context, namespace string, nam } func (prm *prometheusRuleManager) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { - _, err := prm.clientset.MonitoringV1().PrometheusRules(pr.Namespace).Update(ctx, &pr, metav1.UpdateOptions{}) + cs, err := prm.clientsetForCtx(ctx) + if err != nil { + return err + } + _, err = cs.MonitoringV1().PrometheusRules(pr.Namespace).Update(ctx, &pr, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) } @@ -80,7 +88,11 @@ func (prm *prometheusRuleManager) Update(ctx context.Context, pr monitoringv1.Pr } func (prm *prometheusRuleManager) Delete(ctx context.Context, namespace string, name string) error { - err := prm.clientset.MonitoringV1().PrometheusRules(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + cs, err := prm.clientsetForCtx(ctx) + if err != nil { + return err + } + err = cs.MonitoringV1().PrometheusRules(namespace).Delete(ctx, name, metav1.DeleteOptions{}) if err != nil { return fmt.Errorf("failed to delete PrometheusRule %s: %w", name, err) } @@ -88,44 +100,69 @@ func (prm *prometheusRuleManager) Delete(ctx context.Context, namespace string, return nil } +// clientsetForCtx returns a user-scoped clientset when the context carries a +// bearer token (i.e. on API handler requests), or the SA-level clientset for +// background / informer bootstrap calls. +func (prm *prometheusRuleManager) clientsetForCtx(ctx context.Context) (*monitoringv1client.Clientset, error) { + token := BearerTokenFromContext(ctx) + if token == "" { + return prm.clientset, nil + } + cs, err := newUserScopedClientsets(prm.config, token) + if err != nil { + return nil, fmt.Errorf("failed to create user-scoped clientset: %w", err) + } + return cs.monitoringV1, nil +} + func (prm *prometheusRuleManager) AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { - pr, err := prm.getOrCreatePrometheusRule(ctx, namespacedName) + cs, err := prm.clientsetForCtx(ctx) if err != nil { return err } - // Find or create the group - var group *monitoringv1.RuleGroup - for i := range pr.Spec.Groups { - if pr.Spec.Groups[i].Name == groupName { - group = &pr.Spec.Groups[i] - break + // RetryOnConflict handles the concurrent update (409) case that arises when + // multiple replicas perform a read-modify-write on the same PrometheusRule + // at the same time. + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + pr, err := prm.getOrCreatePrometheusRule(ctx, cs, namespacedName) + if err != nil { + return err + } + + // Find or create the group + var group *monitoringv1.RuleGroup + for i := range pr.Spec.Groups { + if pr.Spec.Groups[i].Name == groupName { + group = &pr.Spec.Groups[i] + break + } + } + if group == nil { + pr.Spec.Groups = append(pr.Spec.Groups, monitoringv1.RuleGroup{ + Name: groupName, + Rules: []monitoringv1.Rule{}, + }) + group = &pr.Spec.Groups[len(pr.Spec.Groups)-1] } - } - if group == nil { - pr.Spec.Groups = append(pr.Spec.Groups, monitoringv1.RuleGroup{ - Name: groupName, - Rules: []monitoringv1.Rule{}, - }) - group = &pr.Spec.Groups[len(pr.Spec.Groups)-1] - } - // Add the new rule to the group - group.Rules = append(group.Rules, rule) + // Add the new rule to the group + group.Rules = append(group.Rules, rule) - _, err = prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Update(ctx, pr, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) - } + _, err = cs.MonitoringV1().PrometheusRules(namespacedName.Namespace).Update(ctx, pr, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) + } - return nil + return nil + }) } -func (prm *prometheusRuleManager) getOrCreatePrometheusRule(ctx context.Context, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { - pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Get(ctx, namespacedName.Name, metav1.GetOptions{}) +func (prm *prometheusRuleManager) getOrCreatePrometheusRule(ctx context.Context, cs *monitoringv1client.Clientset, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { + pr, err := cs.MonitoringV1().PrometheusRules(namespacedName.Namespace).Get(ctx, namespacedName.Name, metav1.GetOptions{}) if err != nil { if errors.IsNotFound(err) { - return prm.createPrometheusRule(ctx, namespacedName) + return prm.createPrometheusRule(ctx, cs, namespacedName) } return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) @@ -134,7 +171,7 @@ func (prm *prometheusRuleManager) getOrCreatePrometheusRule(ctx context.Context, return pr, nil } -func (prm *prometheusRuleManager) createPrometheusRule(ctx context.Context, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { +func (prm *prometheusRuleManager) createPrometheusRule(ctx context.Context, cs *monitoringv1client.Clientset, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { pr := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: namespacedName.Name, @@ -145,7 +182,7 @@ func (prm *prometheusRuleManager) createPrometheusRule(ctx context.Context, name }, } - pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Create(ctx, pr, metav1.CreateOptions{}) + pr, err := cs.MonitoringV1().PrometheusRules(namespacedName.Namespace).Create(ctx, pr, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("failed to create PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) } diff --git a/pkg/k8s/relabeled_rules.go b/pkg/k8s/relabeled_rules.go new file mode 100644 index 000000000..7c092c62d --- /dev/null +++ b/pkg/k8s/relabeled_rules.go @@ -0,0 +1,509 @@ +package k8s + +import ( + "context" + "crypto/sha256" + "fmt" + "strings" + "sync" + "time" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +const ( + resyncPeriod = 15 * time.Minute + queueBaseDelay = 50 * time.Millisecond + queueMaxDelay = 3 * time.Minute + + AlertRelabelConfigSecretName = "alert-relabel-configs" + AlertRelabelConfigSecretKey = "config.yaml" + + PrometheusRuleLabelNamespace = "openshift_io_prometheus_rule_namespace" + PrometheusRuleLabelName = "openshift_io_prometheus_rule_name" + AlertRuleLabelId = "openshift_io_alert_rule_id" + + AlertRuleClassificationComponentKey = "openshift_io_alert_rule_component" + AlertRuleClassificationLayerKey = "openshift_io_alert_rule_layer" + + AppKubernetesIoComponent = "app.kubernetes.io/component" + AppKubernetesIoComponentAlertManagementApi = "alert-management-api" + AppKubernetesIoComponentMonitoringPlugin = "monitoring-plugin" +) + +type relabeledRulesManager struct { + queue workqueue.TypedRateLimitingInterface[string] + + namespaceManager NamespaceInterface + alertRelabelConfigs AlertRelabelConfigInterface + prometheusRulesInformer cache.SharedIndexInformer + secretInformer cache.SharedIndexInformer + + // relabeledRules stores the relabeled rules in memory + relabeledRules map[string]monitoringv1.Rule + relabelConfigs []*relabel.Config + mu sync.RWMutex +} + +func newRelabeledRulesManager(ctx context.Context, namespaceManager NamespaceInterface, alertRelabelConfigs AlertRelabelConfigInterface, monitoringv1clientset *monitoringv1client.Clientset, clientset *kubernetes.Clientset) (*relabeledRulesManager, error) { + prometheusRulesInformer := cache.NewSharedIndexInformer( + prometheusRuleListWatchForAllNamespaces(monitoringv1clientset), + &monitoringv1.PrometheusRule{}, + resyncPeriod, + cache.Indexers{}, + ) + + secretInformer := cache.NewSharedIndexInformer( + alertRelabelConfigSecretListWatch(clientset, ClusterMonitoringNamespace), + &corev1.Secret{}, + resyncPeriod, + cache.Indexers{}, + ) + + queue := workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemExponentialFailureRateLimiter[string](queueBaseDelay, queueMaxDelay), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "relabeled-rules"}, + ) + + rrm := &relabeledRulesManager{ + queue: queue, + namespaceManager: namespaceManager, + alertRelabelConfigs: alertRelabelConfigs, + prometheusRulesInformer: prometheusRulesInformer, + secretInformer: secretInformer, + } + + _, err := rrm.prometheusRulesInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule added: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + promRule, ok := newObj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule updated: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + DeleteFunc: func(obj interface{}) { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + obj = tombstone.Obj + } + + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule deleted: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to prometheus rules informer: %w", err) + } + + _, err = rrm.secretInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + rrm.queue.Add("secret-sync") + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + rrm.queue.Add("secret-sync") + }, + DeleteFunc: func(obj interface{}) { + rrm.queue.Add("secret-sync") + }, + }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to secret informer: %w", err) + } + + go rrm.prometheusRulesInformer.Run(ctx.Done()) + go rrm.secretInformer.Run(ctx.Done()) + + if !cache.WaitForNamedCacheSync("RelabeledRulesConfig informer", ctx.Done(), + rrm.prometheusRulesInformer.HasSynced, + rrm.secretInformer.HasSynced, + ) { + return nil, fmt.Errorf("failed to sync RelabeledRulesConfig informer") + } + + if err := rrm.sync(ctx); err != nil { + return nil, fmt.Errorf("initial relabeled rules sync failed: %w", err) + } + + go rrm.worker(ctx) + + return rrm, nil +} + +func alertRelabelConfigSecretListWatch(clientset *kubernetes.Clientset, namespace string) *cache.ListWatch { + return cache.NewListWatchFromClient( + clientset.CoreV1().RESTClient(), + "secrets", + namespace, + fields.OneTermEqualSelector("metadata.name", AlertRelabelConfigSecretName), + ) +} + +func (rrm *relabeledRulesManager) worker(ctx context.Context) { + for rrm.processNextWorkItem(ctx) { + } +} + +func (rrm *relabeledRulesManager) processNextWorkItem(ctx context.Context) bool { + key, quit := rrm.queue.Get() + if quit { + return false + } + + defer rrm.queue.Done(key) + + if err := rrm.sync(ctx); err != nil { + log.Errorf("error syncing relabeled rules: %v", err) + rrm.queue.AddRateLimited(key) + return true + } + + rrm.queue.Forget(key) + + return true +} + +func (rrm *relabeledRulesManager) sync(ctx context.Context) error { + relabelConfigs, err := rrm.loadRelabelConfigs() + if err != nil { + return fmt.Errorf("failed to load relabel configs: %w", err) + } + + rrm.mu.Lock() + rrm.relabelConfigs = relabelConfigs + rrm.mu.Unlock() + + alerts := rrm.collectAlerts(ctx, relabelConfigs) + + rrm.mu.Lock() + rrm.relabeledRules = alerts + rrm.mu.Unlock() + + log.Infof("Synced %d relabeled rules in memory", len(alerts)) + return nil +} + +func (rrm *relabeledRulesManager) loadRelabelConfigs() ([]*relabel.Config, error) { + storeKey := fmt.Sprintf("%s/%s", ClusterMonitoringNamespace, AlertRelabelConfigSecretName) + obj, exists, err := rrm.secretInformer.GetStore().GetByKey(storeKey) + if err != nil { + return nil, fmt.Errorf("failed to get secret from store: %w", err) + } + if !exists { + log.Infof("Alert relabel config secret %q not found", storeKey) + return nil, nil + } + + secret, ok := obj.(*corev1.Secret) + if !ok { + return nil, fmt.Errorf("unexpected object type in secret store: %T", obj) + } + + configData, ok := secret.Data[AlertRelabelConfigSecretKey] + if !ok { + return nil, fmt.Errorf("no config data found in secret %q", AlertRelabelConfigSecretName) + } + + var raw []*relabel.Config + if err := yaml.Unmarshal(configData, &raw); err != nil { + return nil, fmt.Errorf("failed to unmarshal relabel configs: %w", err) + } + + configs := make([]*relabel.Config, 0, len(raw)) + for i, config := range raw { + if config == nil { + log.Warnf("skipping nil relabel config entry at index %d", i) + continue + } + if config.NameValidationScheme == model.UnsetValidation { + config.NameValidationScheme = model.UTF8Validation + } + if err := config.Validate(model.UTF8Validation); err != nil { + return nil, fmt.Errorf("invalid relabel config at index %d: %w", i, err) + } + configs = append(configs, config) + } + + log.Infof("Loaded %d relabel configs from secret %s", len(configs), storeKey) + return configs, nil +} + +func (rrm *relabeledRulesManager) collectAlerts(ctx context.Context, relabelConfigs []*relabel.Config) map[string]monitoringv1.Rule { + alerts := make(map[string]monitoringv1.Rule) + seenIDs := make(map[string]struct{}) + + // Fetch all ARCs once from the informer cache (O(1) per-rule lookup below). + // This avoids O(n) live API server calls inside the per-rule loop that would + // cause exponential rate-limit backoff and stale cache data for new rules. + arcByName := rrm.arcsByName(ctx) + + for _, obj := range rrm.prometheusRulesInformer.GetStore().List() { + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + continue + } + + // Skip deleted rules + if promRule.DeletionTimestamp != nil { + continue + } + + for _, group := range promRule.Spec.Groups { + for _, rule := range group.Rules { + // Only process alerting rules (skip recording rules) + if rule.Alert == "" { + continue + } + + // Compute a deterministic id from the rule spec. + // Do not trust any user-provided value in openshift_io_alert_rule_id since + // PrometheusRule content (including labels) can be tampered with. + alertRuleId := alertrule.GetAlertingRuleId(&rule) + if _, exists := seenIDs[alertRuleId]; exists { + // A second rule that computes to the same id is ambiguous/unsupported (a "true clone"). + // Don't silently overwrite the first rule in the cache. + log.Warnf("Duplicate alert rule id %q computed for %s/%s (alert=%q); skipping duplicate", alertRuleId, promRule.Namespace, promRule.Name, rule.Alert) + continue + } + seenIDs[alertRuleId] = struct{}{} + + clonedLabels := make(map[string]string, len(rule.Labels)+5) + for k, v := range rule.Labels { + clonedLabels[k] = v + } + rule.Labels = clonedLabels + + rule.Labels[managementlabels.AlertNameLabel] = rule.Alert + + if rrm.namespaceManager.IsClusterMonitoringNamespace(promRule.Namespace) { + lb := labels.NewBuilder(labels.FromMap(rule.Labels)) + keep := relabel.ProcessBuilder(lb, relabelConfigs...) + if !keep { + log.Infof("Skipping dropped alert %s from %s/%s", rule.Alert, promRule.Namespace, promRule.Name) + continue + } + + rule.Labels = lb.Labels().Map() + } + + rule.Labels[AlertRuleLabelId] = alertRuleId + rule.Labels[PrometheusRuleLabelNamespace] = promRule.Namespace + rule.Labels[PrometheusRuleLabelName] = promRule.Name + + if arName := alertingRuleOwner(promRule); arName != "" { + rule.Labels[managementlabels.AlertingRuleLabelName] = arName + } + + ruleManagedBy, relabelConfigManagedBy := rrm.determineManagedBy(promRule, alertRuleId, arcByName) + if ruleManagedBy != "" { + rule.Labels[managementlabels.RuleManagedByLabel] = ruleManagedBy + } + if relabelConfigManagedBy != "" { + rule.Labels[managementlabels.RelabelConfigManagedByLabel] = relabelConfigManagedBy + } + + alerts[alertRuleId] = rule + } + } + } + + log.Debugf("Collected %d alerts", len(alerts)) + return alerts +} + +// alertingRuleOwner returns the name of the AlertingRule CR that generated +// this PrometheusRule, or "" if it was not generated by one. Detection is based +// on the ownerReferences set by the alerting-rules-controller. +func alertingRuleOwner(pr *monitoringv1.PrometheusRule) string { + for _, ref := range pr.OwnerReferences { + if ref.Kind == "AlertingRule" && ref.Controller != nil && *ref.Controller { + return ref.Name + } + } + return "" +} + +// GetAlertRelabelConfigName builds the AlertRelabelConfig name from a PrometheusRule name and alert rule ID +func GetAlertRelabelConfigName(promRuleName string, alertRuleId string) string { + return fmt.Sprintf("arc-%s-%s", sanitizeDNSName(promRuleName), shortHash(alertRuleId, 12)) +} + +// sanitizeDNSName lowercases and replaces invalid chars with '-', trims extra '-' +func sanitizeDNSName(in string) string { + if in == "" { + return "" + } + s := strings.ToLower(in) + // replace any char not [a-z0-9-] with '-' + out := make([]rune, 0, len(s)) + for _, r := range s { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' { + out = append(out, r) + } else { + out = append(out, '-') + } + } + // collapse multiple '-' and trim + res := strings.Trim(strings.ReplaceAll(string(out), "--", "-"), "-") + if res == "" { + return "arc" + } + return res +} + +func shortHash(id string, n int) string { + sum := sha256.Sum256([]byte(id)) + full := fmt.Sprintf("%x", sum[:]) + if n > len(full) { + return full + } + return full[:n] +} + +// arcsByName builds a namespace/name → ARC map from the informer cache. +// Called once per sync cycle so that determineManagedBy can do O(1) lookups +// instead of one live API call per rule. +func (rrm *relabeledRulesManager) arcsByName(ctx context.Context) map[string]*osmv1.AlertRelabelConfig { + if rrm.alertRelabelConfigs == nil { + return nil + } + arcs, err := rrm.alertRelabelConfigs.List(ctx, "") + if err != nil { + log.Errorf("arcsByName: failed to list ARCs from cache: %v", err) + return nil + } + m := make(map[string]*osmv1.AlertRelabelConfig, len(arcs)) + for i := range arcs { + key := arcs[i].Namespace + "/" + arcs[i].Name + m[key] = &arcs[i] + } + return m +} + +// determineManagedBy determines the openshift_io_rule_managed_by and openshift_io_relabel_config_managed_by label values +func (rrm *relabeledRulesManager) determineManagedBy(promRule *monitoringv1.PrometheusRule, alertRuleId string, arcByName map[string]*osmv1.AlertRelabelConfig) (string, string) { + // Determine ruleManagedBy from PrometheusRule + var ruleManagedBy string + // If generated by AlertingRule CRD, do not mark as operator-managed; treat as user-via-platform + if alertingRuleOwner(promRule) == "" { + // Prefer operator-managed over GitOps when owner references indicate an operator + gitOpsManaged, operatorManaged := IsExternallyManagedObject(promRule) + if operatorManaged { + ruleManagedBy = managementlabels.ManagedByOperator + } else if gitOpsManaged { + ruleManagedBy = managementlabels.ManagedByGitOps + } + } + + // Determine relabelConfigManagedBy only for platform rules using the + // pre-fetched cache map; no live API call is made here. + isPlatform := rrm.namespaceManager.IsClusterMonitoringNamespace(promRule.Namespace) + var relabelConfigManagedBy string + if isPlatform && arcByName != nil { + arcName := GetAlertRelabelConfigName(promRule.Name, alertRuleId) + key := promRule.Namespace + "/" + arcName + if arc, found := arcByName[key]; found { + if IsManagedByGitOps(arc.Annotations, arc.Labels) { + relabelConfigManagedBy = managementlabels.ManagedByGitOps + } + } + } + + return ruleManagedBy, relabelConfigManagedBy +} + +// DetermineManagedBy determines the managed-by labels for a single PrometheusRule +// alert rule. Callers that have a user-scoped context (e.g. tests) can pass a +// live AlertRelabelConfigInterface; a targeted Get is performed for that one rule. +func DetermineManagedBy(ctx context.Context, alertRelabelConfigs AlertRelabelConfigInterface, namespaceManager NamespaceInterface, promRule *monitoringv1.PrometheusRule, alertRuleId string) (string, string) { + // Single-rule path: fetch only the specific ARC with RBAC enforcement on the + // caller's context, then build a one-entry map for determineManagedBy. + var arcByName map[string]*osmv1.AlertRelabelConfig + if alertRelabelConfigs != nil && namespaceManager.IsClusterMonitoringNamespace(promRule.Namespace) { + arcName := GetAlertRelabelConfigName(promRule.Name, alertRuleId) + arc, found, err := alertRelabelConfigs.Get(ctx, promRule.Namespace, arcName) + if err == nil && found { + arcByName = map[string]*osmv1.AlertRelabelConfig{ + promRule.Namespace + "/" + arcName: arc, + } + } + } + rrm := &relabeledRulesManager{ + alertRelabelConfigs: alertRelabelConfigs, + namespaceManager: namespaceManager, + } + return rrm.determineManagedBy(promRule, alertRuleId, arcByName) +} + +func (rrm *relabeledRulesManager) List(ctx context.Context) []monitoringv1.Rule { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + result := make([]monitoringv1.Rule, 0, len(rrm.relabeledRules)) + for _, rule := range rrm.relabeledRules { + result = append(result, deepCopyRule(rule)) + } + + return result +} + +func (rrm *relabeledRulesManager) Get(ctx context.Context, id string) (monitoringv1.Rule, bool) { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + rule, ok := rrm.relabeledRules[id] + if !ok { + return monitoringv1.Rule{}, false + } + + return deepCopyRule(rule), true +} + +func deepCopyRule(r monitoringv1.Rule) monitoringv1.Rule { + cp := r + if r.Labels != nil { + cp.Labels = make(map[string]string, len(r.Labels)) + for k, v := range r.Labels { + cp.Labels[k] = v + } + } + if r.Annotations != nil { + cp.Annotations = make(map[string]string, len(r.Annotations)) + for k, v := range r.Annotations { + cp.Annotations[k] = v + } + } + return cp +} + +func (rrm *relabeledRulesManager) Config() []*relabel.Config { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + return append([]*relabel.Config{}, rrm.relabelConfigs...) +} diff --git a/pkg/k8s/relabeled_rules_test.go b/pkg/k8s/relabeled_rules_test.go new file mode 100644 index 000000000..1d10ef48c --- /dev/null +++ b/pkg/k8s/relabeled_rules_test.go @@ -0,0 +1,157 @@ +package k8s + +import ( + "context" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +// arcGetPanicInterface implements AlertRelabelConfigInterface and panics if +// Get is called. It is used to verify that the sync path never calls Get. +type arcGetPanicInterface struct { + arcs []osmv1.AlertRelabelConfig +} + +func (m *arcGetPanicInterface) List(_ context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + if namespace == "" { + return m.arcs, nil + } + var filtered []osmv1.AlertRelabelConfig + for _, a := range m.arcs { + if a.Namespace == namespace { + filtered = append(filtered, a) + } + } + return filtered, nil +} + +func (m *arcGetPanicInterface) Get(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + panic("Get must not be called during sync; use the arcByName cache map instead") +} + +func (m *arcGetPanicInterface) Create(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + return &arc, nil +} + +func (m *arcGetPanicInterface) Update(_ context.Context, _ osmv1.AlertRelabelConfig) error { + return nil +} + +func (m *arcGetPanicInterface) Delete(_ context.Context, _, _ string) error { + return nil +} + +// stubNamespaceManager implements NamespaceInterface for tests. +type stubNamespaceManager struct { + platformNamespaces map[string]bool +} + +func (s *stubNamespaceManager) IsClusterMonitoringNamespace(name string) bool { + return s.platformNamespaces[name] +} + +// TestDetermineManagedBy_NeverCallsGet verifies that determineManagedBy +// uses the pre-fetched arcByName map and never issues a live Get call, +// even for platform-namespace rules with a matching ARC. +func TestDetermineManagedBy_NeverCallsGet(t *testing.T) { + const ( + namespace = "openshift-monitoring" + promRuleName = "test-rule" + alertRuleID = "abc123" + ) + + arcName := GetAlertRelabelConfigName(promRuleName, alertRuleID) + arc := osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: arcName, + Annotations: map[string]string{ + "argocd.argoproj.io/managed-by": "some-app", + }, + }, + } + + rrm := &relabeledRulesManager{ + // arcGetPanicInterface panics if Get is called — this is the guard. + alertRelabelConfigs: &arcGetPanicInterface{arcs: []osmv1.AlertRelabelConfig{arc}}, + namespaceManager: &stubNamespaceManager{ + platformNamespaces: map[string]bool{namespace: true}, + }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: promRuleName, + }, + } + + // Build arcByName from List (no Get call). + arcByName := rrm.arcsByName(context.Background()) + + // This must not panic (i.e. must not call Get). + ruleManagedBy, relabelConfigManagedBy := rrm.determineManagedBy(promRule, alertRuleID, arcByName) + + if ruleManagedBy != "" { + t.Errorf("expected empty ruleManagedBy, got %q", ruleManagedBy) + } + if relabelConfigManagedBy != managementlabels.ManagedByGitOps { + t.Errorf("expected relabelConfigManagedBy=%q, got %q", managementlabels.ManagedByGitOps, relabelConfigManagedBy) + } +} + +// TestDetermineManagedBy_NoARCMatch verifies that a platform rule with no +// matching ARC in the cache produces empty relabelConfigManagedBy. +func TestDetermineManagedBy_NoARCMatch(t *testing.T) { + const namespace = "openshift-monitoring" + + rrm := &relabeledRulesManager{ + alertRelabelConfigs: &arcGetPanicInterface{arcs: nil}, + namespaceManager: &stubNamespaceManager{ + platformNamespaces: map[string]bool{namespace: true}, + }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: "some-rule", + }, + } + + arcByName := rrm.arcsByName(context.Background()) + _, relabelConfigManagedBy := rrm.determineManagedBy(promRule, "no-match-id", arcByName) + + if relabelConfigManagedBy != "" { + t.Errorf("expected empty relabelConfigManagedBy for no ARC match, got %q", relabelConfigManagedBy) + } +} + +// TestDetermineManagedBy_NonPlatformRuleSkipsARCLookup verifies that a +// user-workload rule (non-platform namespace) does not consult ARCs at all. +func TestDetermineManagedBy_NonPlatformRuleSkipsARCLookup(t *testing.T) { + rrm := &relabeledRulesManager{ + // Non-nil but panics on Get — confirms no lookup occurs. + alertRelabelConfigs: &arcGetPanicInterface{arcs: nil}, + namespaceManager: &stubNamespaceManager{platformNamespaces: map[string]bool{}}, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "user-namespace", + Name: "user-rule", + }, + } + + arcByName := rrm.arcsByName(context.Background()) + _, relabelConfigManagedBy := rrm.determineManagedBy(promRule, "some-id", arcByName) + + if relabelConfigManagedBy != "" { + t.Errorf("expected empty relabelConfigManagedBy for non-platform rule, got %q", relabelConfigManagedBy) + } +} diff --git a/pkg/k8s/types.go b/pkg/k8s/types.go index e22c38f57..102d5fccf 100644 --- a/pkg/k8s/types.go +++ b/pkg/k8s/types.go @@ -3,7 +3,9 @@ package k8s import ( "context" + osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/relabel" "k8s.io/apimachinery/pkg/types" ) @@ -22,6 +24,15 @@ type Client interface { // PrometheusRules returns the PrometheusRule interface PrometheusRules() PrometheusRuleInterface + // AlertRelabelConfigs returns the AlertRelabelConfig interface + AlertRelabelConfigs() AlertRelabelConfigInterface + + // AlertingRules returns the AlertingRule interface + AlertingRules() AlertingRuleInterface + + // RelabeledRules returns the RelabeledRules interface + RelabeledRules() RelabeledRulesInterface + // Namespace returns the Namespace interface Namespace() NamespaceInterface } @@ -44,6 +55,55 @@ type PrometheusRuleInterface interface { AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error } +// AlertRelabelConfigInterface defines operations for managing AlertRelabelConfigs +type AlertRelabelConfigInterface interface { + // List lists all AlertRelabelConfigs in the cluster + List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + + // Get retrieves an AlertRelabelConfig by namespace and name + Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) + + // Create creates a new AlertRelabelConfig + Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) + + // Update updates an existing AlertRelabelConfig + Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error + + // Delete deletes an AlertRelabelConfig by namespace and name + Delete(ctx context.Context, namespace string, name string) error +} + +// AlertingRuleInterface defines operations for managing AlertingRules +// in the cluster monitoring namespace +type AlertingRuleInterface interface { + // List lists all AlertingRules in the cluster + List(ctx context.Context) ([]osmv1.AlertingRule, error) + + // Get retrieves an AlertingRule by name + Get(ctx context.Context, name string) (*osmv1.AlertingRule, bool, error) + + // Create creates a new AlertingRule + Create(ctx context.Context, ar osmv1.AlertingRule) (*osmv1.AlertingRule, error) + + // Update updates an existing AlertingRule + Update(ctx context.Context, ar osmv1.AlertingRule) error + + // Delete deletes an AlertingRule by name + Delete(ctx context.Context, name string) error +} + +// RelabeledRulesInterface defines operations for managing relabeled rules +type RelabeledRulesInterface interface { + // List retrieves the relabeled rules for a given PrometheusRule + List(ctx context.Context) []monitoringv1.Rule + + // Get retrieves the relabeled rule for a given id + Get(ctx context.Context, id string) (monitoringv1.Rule, bool) + + // Config returns the list of alert relabel configs + Config() []*relabel.Config +} + // NamespaceInterface defines operations for Namespaces type NamespaceInterface interface { // IsClusterMonitoringNamespace checks if a namespace has the openshift.io/cluster-monitoring=true label diff --git a/pkg/k8s/user_scoped_client.go b/pkg/k8s/user_scoped_client.go new file mode 100644 index 000000000..448ba4d8e --- /dev/null +++ b/pkg/k8s/user_scoped_client.go @@ -0,0 +1,39 @@ +package k8s + +import ( + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "k8s.io/client-go/rest" +) + +// userScopedClientsets holds short-lived clientsets that authenticate as the +// requesting user rather than the plugin's service account. +type userScopedClientsets struct { + monitoringV1 *monitoringv1client.Clientset + osmV1 *osmv1client.Clientset +} + +// newUserScopedClientsets creates clientsets that carry the supplied bearer +// token so that Kubernetes RBAC is enforced for the requesting user on all +// mutating API calls. +func newUserScopedClientsets(baseConfig *rest.Config, userToken string) (*userScopedClientsets, error) { + cfg := rest.CopyConfig(baseConfig) + // Override any SA token loaded from the file system with the user's token. + cfg.BearerToken = userToken + cfg.BearerTokenFile = "" + + monClient, err := monitoringv1client.NewForConfig(cfg) + if err != nil { + return nil, err + } + + osmClient, err := osmv1client.NewForConfig(cfg) + if err != nil { + return nil, err + } + + return &userScopedClientsets{ + monitoringV1: monClient, + osmV1: osmClient, + }, nil +} diff --git a/pkg/management/client_factory.go b/pkg/management/client_factory.go new file mode 100644 index 000000000..e71b7f93b --- /dev/null +++ b/pkg/management/client_factory.go @@ -0,0 +1,14 @@ +package management + +import ( + "context" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +// New creates a new management client. +func New(ctx context.Context, k8sClient k8s.Client) Client { + return &client{ + k8sClient: k8sClient, + } +} diff --git a/pkg/management/create_platform_alert_rule.go b/pkg/management/create_platform_alert_rule.go new file mode 100644 index 000000000..7d49ea5ee --- /dev/null +++ b/pkg/management/create_platform_alert_rule.go @@ -0,0 +1,142 @@ +package management + +import ( + "context" + "fmt" + "strings" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +const ( + defaultAlertingRuleName = "platform-alert-rules" + defaultPlatformGroupName = "platform-alert-rules" +) + +func (c *client) CreatePlatformAlertRule(ctx context.Context, alertRule monitoringv1.Rule) (string, error) { + if err := validateAlertRuleInputs(alertRule); err != nil { + return "", err + } + + newRuleId := alertrule.GetAlertingRuleId(&alertRule) + + if _, found := c.k8sClient.RelabeledRules().Get(ctx, newRuleId); found { + return "", &ConflictError{Message: "alert rule with exact config already exists"} + } + + if alertRule.Labels == nil { + alertRule.Labels = map[string]string{} + } + alertRule.Labels[k8s.AlertRuleLabelId] = newRuleId + + osmRule := toOSMRule(alertRule) + + // RetryOnConflict handles the concurrent update (409) case that arises when + // multiple replicas perform a read-modify-write on the same AlertingRule. + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + existing, found, getErr := c.k8sClient.AlertingRules().Get(ctx, defaultAlertingRuleName) + if getErr != nil { + return fmt.Errorf("failed to get AlertingRule %s: %w", defaultAlertingRuleName, getErr) + } + + if found { + // Disallow adding to externally managed AlertingRules + if gitOpsManaged, operatorManaged := k8s.IsExternallyManagedObject(existing); gitOpsManaged { + return &NotAllowedError{Message: "The AlertingRule is managed by GitOps; create the alert in Git."} + } else if operatorManaged { + return &NotAllowedError{Message: "This AlertingRule is managed by an operator; you cannot add alerts to it."} + } + updated := existing.DeepCopy() + if addErr := addRuleToGroup(&updated.Spec, defaultPlatformGroupName, osmRule); addErr != nil { + return addErr + } + if updateErr := c.k8sClient.AlertingRules().Update(ctx, *updated); updateErr != nil { + return fmt.Errorf("failed to update AlertingRule %s: %w", defaultAlertingRuleName, updateErr) + } + return nil + } + + ar := osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultAlertingRuleName, + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{ + { + Name: defaultPlatformGroupName, + Rules: []osmv1.Rule{osmRule}, + }, + }, + }, + } + + if _, createErr := c.k8sClient.AlertingRules().Create(ctx, ar); createErr != nil { + return fmt.Errorf("failed to create AlertingRule %s: %w", defaultAlertingRuleName, createErr) + } + return nil + }) + if err != nil { + return "", err + } + + return newRuleId, nil +} + +func validateAlertRuleInputs(alertRule monitoringv1.Rule) error { + alertName := strings.TrimSpace(alertRule.Alert) + if alertName == "" { + return &ValidationError{Message: "alert name is required"} + } + + if strings.TrimSpace(alertRule.Expr.String()) == "" { + return &ValidationError{Message: "expr is required"} + } + + if v, ok := alertRule.Labels["severity"]; ok && !isValidSeverity(v) { + return &ValidationError{Message: fmt.Sprintf("invalid severity %q: must be one of critical|warning|info|none", v)} + } + + return nil +} + +func addRuleToGroup(spec *osmv1.AlertingRuleSpec, groupName string, rule osmv1.Rule) error { + for i := range spec.Groups { + if spec.Groups[i].Name != groupName { + continue + } + for _, existing := range spec.Groups[i].Rules { + if existing.Alert == rule.Alert { + return &ConflictError{Message: fmt.Sprintf("alert rule %q already exists in group %q", rule.Alert, groupName)} + } + } + spec.Groups[i].Rules = append(spec.Groups[i].Rules, rule) + return nil + } + spec.Groups = append(spec.Groups, osmv1.RuleGroup{ + Name: groupName, + Rules: []osmv1.Rule{rule}, + }) + return nil +} + +func toOSMRule(rule monitoringv1.Rule) osmv1.Rule { + osmRule := osmv1.Rule{ + Alert: rule.Alert, + Expr: rule.Expr, + Labels: rule.Labels, + Annotations: rule.Annotations, + } + + if rule.For != nil { + osmRule.For = osmv1.Duration(*rule.For) + } + + return osmRule +} diff --git a/pkg/management/create_platform_alert_rule_test.go b/pkg/management/create_platform_alert_rule_test.go new file mode 100644 index 000000000..ae3de8925 --- /dev/null +++ b/pkg/management/create_platform_alert_rule_test.go @@ -0,0 +1,290 @@ +package management_test + +import ( + "context" + "errors" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +func newPlatformBaseRule() monitoringv1.Rule { + return monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + For: (*monitoringv1.Duration)(stringPtr("5m")), + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "summary": "platform alert", + }, + } +} + +func TestCreatePlatformAlertRule_EmptyAlertName(t *testing.T) { + rule := newPlatformBaseRule() + rule.Alert = " " + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "alert name is required") { + t.Fatalf("expected 'alert name is required', got %v", err) + } +} + +func TestCreatePlatformAlertRule_EmptyExpr(t *testing.T) { + rule := newPlatformBaseRule() + rule.Expr = intstr.FromString(" ") + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "expr is required") { + t.Fatalf("expected 'expr is required', got %v", err) + } +} + +func TestCreatePlatformAlertRule_InvalidSeverity(t *testing.T) { + rule := newPlatformBaseRule() + rule.Labels = map[string]string{"severity": "fatal"} + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "invalid severity") { + t.Fatalf("expected 'invalid severity', got %v", err) + } +} + +func TestCreatePlatformAlertRule_DuplicateRuleId(t *testing.T) { + rule := newPlatformBaseRule() + ruleID := alertrule.GetAlertingRuleId(&rule) + + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == ruleID { + return rule, true + } + return monitoringv1.Rule{}, false + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "exact config already exists") { + t.Fatalf("expected conflict error, got %v", err) + } +} + +func TestCreatePlatformAlertRule_GitOpsManaged(t *testing.T) { + rule := newPlatformBaseRule() + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: k8s.ClusterMonitoringNamespace, + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "abc"}, + }, + }, true, nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "managed by GitOps") { + t.Fatalf("expected GitOps error, got %v", err) + } +} + +func TestCreatePlatformAlertRule_UpdateExisting(t *testing.T) { + rule := newPlatformBaseRule() + var updated osmv1.AlertingRule + + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{ + { + Name: "platform-alert-rules", + Rules: []osmv1.Rule{ + {Alert: "ExistingAlert", Expr: intstr.FromString("vector(1)")}, + }, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(_ context.Context, ar osmv1.AlertingRule) error { + updated = ar + return nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + ruleID, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ruleID != alertrule.GetAlertingRuleId(&rule) { + t.Errorf("wrong ruleID: %q", ruleID) + } + if updated.Name != "platform-alert-rules" { + t.Errorf("wrong AlertingRule name: %q", updated.Name) + } + if len(updated.Spec.Groups) != 1 || len(updated.Spec.Groups[0].Rules) != 2 { + t.Errorf("expected 1 group with 2 rules, got %v", updated.Spec.Groups) + } + if _, ok := updated.Spec.Groups[0].Rules[1].Labels[k8s.AlertRuleLabelId]; !ok { + t.Error("expected AlertRuleLabelId on new rule") + } +} + +func TestCreatePlatformAlertRule_ConflictAlertName(t *testing.T) { + rule := newPlatformBaseRule() + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: k8s.ClusterMonitoringNamespace}, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{ + { + Name: "platform-alert-rules", + Rules: []osmv1.Rule{ + {Alert: "PlatformAlert", Expr: intstr.FromString("vector(1)")}, + }, + }, + }, + }, + }, true, nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "already exists in group") { + t.Fatalf("expected conflict error, got %v", err) + } +} + +func TestCreatePlatformAlertRule_CreateNew(t *testing.T) { + rule := newPlatformBaseRule() + var created osmv1.AlertingRule + + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + return nil, false, nil + }, + CreateFunc: func(_ context.Context, ar osmv1.AlertingRule) (*osmv1.AlertingRule, error) { + created = ar + return &ar, nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if created.Name != "platform-alert-rules" { + t.Errorf("wrong name: %q", created.Name) + } + if created.Namespace != k8s.ClusterMonitoringNamespace { + t.Errorf("wrong namespace: %q", created.Namespace) + } + if len(created.Spec.Groups) != 1 || len(created.Spec.Groups[0].Rules) != 1 { + t.Errorf("unexpected groups: %v", created.Spec.Groups) + } + if _, ok := created.Spec.Groups[0].Rules[0].Labels[k8s.AlertRuleLabelId]; !ok { + t.Error("expected AlertRuleLabelId on created rule") + } +} + +func TestCreatePlatformAlertRule_GetFails(t *testing.T) { + rule := newPlatformBaseRule() + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + AlertingRulesFunc: func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + return nil, false, errors.New("get failed") + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + _, err := client.CreatePlatformAlertRule(context.Background(), rule) + if err == nil || !containsString(err.Error(), "failed to get AlertingRule") || !containsString(err.Error(), "get failed") { + t.Fatalf("expected wrapped error, got %v", err) + } +} diff --git a/pkg/management/create_user_defined_alert_rule.go b/pkg/management/create_user_defined_alert_rule.go new file mode 100644 index 000000000..fb4c030ef --- /dev/null +++ b/pkg/management/create_user_defined_alert_rule.go @@ -0,0 +1,138 @@ +package management + +import ( + "context" + "strings" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +const ( + DefaultGroupName = "user-defined-rules" +) + +func (c *client) CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (string, error) { + if prOptions.Name == "" || prOptions.Namespace == "" { + return "", &ValidationError{Message: "PrometheusRule Name and Namespace must be specified"} + } + + if err := validateAlertRuleInputs(alertRule); err != nil { + return "", err + } + + // compute id from the rule content BEFORE mutating labels + computedRuleID := alertrule.GetAlertingRuleId(&alertRule) + // set/stamp the rule id label on user-defined rules + if alertRule.Labels == nil { + alertRule.Labels = map[string]string{} + } + alertRule.Labels[k8s.AlertRuleLabelId] = computedRuleID + + // Check if rule with the same ID already exists (fast path) + _, found := c.k8sClient.RelabeledRules().Get(ctx, computedRuleID) + if found { + return "", &ConflictError{Message: "alert rule with exact config already exists"} + } + + // Deny creating an equivalent rule (same spec: expr, for, labels including severity) even if alert name differs + if c.existsUserDefinedRuleWithSameSpec(ctx, alertRule) { + return "", &ConflictError{Message: "alert rule with equivalent spec already exists"} + } + + nn := types.NamespacedName{ + Name: prOptions.Name, + Namespace: prOptions.Namespace, + } + + if c.isPlatformManagedPrometheusRule(nn) { + return "", &NotAllowedError{Message: "cannot add user-defined alert rule to a platform-managed PrometheusRule; create an AlertingRule CR instead"} + } + + pr, prFound, err := c.k8sClient.PrometheusRules().Get(ctx, nn.Namespace, nn.Name) + if err != nil { + return "", err + } + if prFound && pr != nil { + if gitOpsManaged, operatorManaged := k8s.IsExternallyManagedObject(pr); gitOpsManaged { + return "", &NotAllowedError{Message: "This PrometheusRule is managed by GitOps; create the alert in Git."} + } else if operatorManaged { + return "", &NotAllowedError{Message: "This PrometheusRule is managed by an operator; you cannot add alerts to it."} + } + // Enforce uniqueness: "true clones" (identical definitions) compute to the same rule ID. + for _, g := range pr.Spec.Groups { + for _, r := range g.Rules { + if r.Alert != "" && alertrule.GetAlertingRuleId(&r) == computedRuleID { + return "", &ConflictError{Message: "alert rule with exact config already exists"} + } + } + } + } + + if prOptions.GroupName == "" { + prOptions.GroupName = DefaultGroupName + } + + err = c.k8sClient.PrometheusRules().AddRule(ctx, nn, prOptions.GroupName, alertRule) + if err != nil { + return "", err + } + + return computedRuleID, nil +} + +// existsUserDefinedRuleWithSameSpec returns true if a rule with an equivalent +// specification already exists in the relabeled rules cache. +func (c *client) existsUserDefinedRuleWithSameSpec(ctx context.Context, candidate monitoringv1.Rule) bool { + for _, existing := range c.k8sClient.RelabeledRules().List(ctx) { + if rulesHaveEquivalentSpec(existing, candidate) { + return true + } + } + return false +} + +// rulesHaveEquivalentSpec compares two alert rules for equivalence based on +// expression, duration (for) and non-system labels (excluding openshift_io_* and alertname). +func rulesHaveEquivalentSpec(a, b monitoringv1.Rule) bool { + if alertrule.NormalizeExpr(a.Expr.String()) != alertrule.NormalizeExpr(b.Expr.String()) { + return false + } + var af, bf string + if a.For != nil { + af = string(*a.For) + } + if b.For != nil { + bf = string(*b.For) + } + if af != bf { + return false + } + al := filterBusinessLabels(a.Labels) + bl := filterBusinessLabels(b.Labels) + if len(al) != len(bl) { + return false + } + for k, v := range al { + if bl[k] != v { + return false + } + } + return true +} + +// filterBusinessLabels returns labels excluding system/provenance and identity labels. +func filterBusinessLabels(in map[string]string) map[string]string { + out := map[string]string{} + for k, v := range in { + if strings.HasPrefix(k, "openshift_io_") || k == managementlabels.AlertNameLabel { + continue + } + out[k] = v + } + return out +} diff --git a/pkg/management/create_user_defined_alert_rule_test.go b/pkg/management/create_user_defined_alert_rule_test.go new file mode 100644 index 000000000..ffc0df894 --- /dev/null +++ b/pkg/management/create_user_defined_alert_rule_test.go @@ -0,0 +1,374 @@ +package management_test + +import ( + "context" + "errors" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var testRule = monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + For: (*monitoringv1.Duration)(stringPtr("5m")), + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "summary": "Test alert", + }, +} + +func stringPtr(s string) *string { return &s } + +func containsString(s, sub string) bool { + if len(sub) == 0 { + return true + } + for i := 0; i <= len(s)-len(sub); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} + +func TestCreateUserDefinedAlertRule_GitOpsManaged(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "abc"}, + }, + }, true, nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-pr", Namespace: "user-ns"}) + if err == nil || !containsString(err.Error(), "This PrometheusRule is managed by GitOps; create the alert in Git.") { + t.Fatalf("expected GitOps error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_OperatorManaged(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + OwnerReferences: []metav1.OwnerReference{ + {Kind: "Deployment", Name: "some-operator"}, + }, + }, + }, true, nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-pr", Namespace: "user-ns"}) + if err == nil || !containsString(err.Error(), "This PrometheusRule is managed by an operator; you cannot add alerts to it.") { + t.Fatalf("expected operator-managed error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_MissingName(t *testing.T) { + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Namespace: "test-namespace"}) + if err == nil || !containsString(err.Error(), "PrometheusRule Name and Namespace must be specified") { + t.Fatalf("expected validation error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_MissingNamespace(t *testing.T) { + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "test-rule"}) + if err == nil || !containsString(err.Error(), "PrometheusRule Name and Namespace must be specified") { + t.Fatalf("expected validation error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_EmptyAlertName(t *testing.T) { + rule := testRule + rule.Alert = " " + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), rule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "alert name is required") { + t.Fatalf("expected validation error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_EmptyExpr(t *testing.T) { + rule := testRule + rule.Expr = intstr.FromString(" ") + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), rule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "expr is required") { + t.Fatalf("expected validation error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_InvalidSeverity(t *testing.T) { + rule := testRule + rule.Labels = map[string]string{"severity": "fatal"} + mockK8s := &testutils.MockClient{} + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), rule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "invalid severity") { + t.Fatalf("expected severity error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_PlatformManagedNamespace(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "platform-rule", Namespace: "openshift-monitoring"}) + if err == nil || !containsString(err.Error(), "cannot add user-defined alert rule to a platform-managed PrometheusRule") { + t.Fatalf("expected platform error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_DuplicateRuleId(t *testing.T) { + ruleId := alertrule.GetAlertingRuleId(&testRule) + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == ruleId { + return testRule, true + } + return monitoringv1.Rule{}, false + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "alert rule with exact config already exists") { + t.Fatalf("expected conflict error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_AddRuleFails(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(_ context.Context, _ types.NamespacedName, _ string, _ monitoringv1.Rule) error { + return errors.New("failed to add rule") + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "failed to add rule") { + t.Fatalf("expected add rule error, got %v", err) + } +} + +func TestCreateUserDefinedAlertRule_Success(t *testing.T) { + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(_ context.Context, _ types.NamespacedName, _ string, _ monitoringv1.Rule) error { + return nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + ruleId, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ruleId == "" || ruleId != alertrule.GetAlertingRuleId(&testRule) { + t.Errorf("unexpected ruleId: %q", ruleId) + } +} + +func TestCreateUserDefinedAlertRule_DefaultGroupName(t *testing.T) { + var capturedGroupName string + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(_ context.Context, _ types.NamespacedName, groupName string, _ monitoringv1.Rule) error { + capturedGroupName = groupName + return nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if capturedGroupName != "user-defined-rules" { + t.Errorf("expected 'user-defined-rules', got %q", capturedGroupName) + } +} + +func TestCreateUserDefinedAlertRule_CustomGroupName(t *testing.T) { + var capturedGroupName string + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(_ context.Context, _ types.NamespacedName, groupName string, _ monitoringv1.Rule) error { + capturedGroupName = groupName + return nil + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace", GroupName: "custom-group"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if capturedGroupName != "custom-group" { + t.Errorf("expected 'custom-group', got %q", capturedGroupName) + } +} + +func TestCreateUserDefinedAlertRule_EquivalentSpecDenied(t *testing.T) { + existing := monitoringv1.Rule{} + testRule.DeepCopyInto(&existing) + existing.Alert = "OtherName" + + mockK8s := &testutils.MockClient{ + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return false }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{existing} + }, + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + }, + } + client := management.New(context.Background(), mockK8s) + _, err := client.CreateUserDefinedAlertRule(context.Background(), testRule, management.PrometheusRuleOptions{Name: "user-rule", Namespace: "user-namespace"}) + if err == nil || !containsString(err.Error(), "equivalent spec already exists") { + t.Fatalf("expected equivalent spec error, got %v", err) + } +} diff --git a/pkg/management/errors.go b/pkg/management/errors.go new file mode 100644 index 000000000..d0bec9127 --- /dev/null +++ b/pkg/management/errors.go @@ -0,0 +1,44 @@ +package management + +import "fmt" + +type NotFoundError struct { + Resource string + Id string + + AdditionalInfo string +} + +func (r *NotFoundError) Error() string { + s := fmt.Sprintf("%s with id %s not found", r.Resource, r.Id) + + if r.AdditionalInfo != "" { + s += fmt.Sprintf(": %s", r.AdditionalInfo) + } + + return s +} + +type NotAllowedError struct { + Message string +} + +func (r *NotAllowedError) Error() string { + return r.Message +} + +type ValidationError struct { + Message string +} + +func (e *ValidationError) Error() string { + return e.Message +} + +type ConflictError struct { + Message string +} + +func (e *ConflictError) Error() string { + return e.Message +} diff --git a/pkg/management/label_utils.go b/pkg/management/label_utils.go new file mode 100644 index 000000000..2efb36ca8 --- /dev/null +++ b/pkg/management/label_utils.go @@ -0,0 +1,12 @@ +package management + +var validSeverities = map[string]bool{ + "critical": true, + "warning": true, + "info": true, + "none": true, +} + +func isValidSeverity(s string) bool { + return validSeverities[s] +} diff --git a/pkg/management/management.go b/pkg/management/management.go new file mode 100644 index 000000000..652ac14de --- /dev/null +++ b/pkg/management/management.go @@ -0,0 +1,22 @@ +package management + +import ( + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type client struct { + k8sClient k8s.Client +} + +// isPlatformManagedPrometheusRule returns true when the target +// PrometheusRule lives in a namespace labeled +// openshift.io/cluster-monitoring=true. CMO's platform Prometheus +// evaluates every PrometheusRule in those namespaces regardless of +// who created it, so the namespace boundary is the correct routing +// check. Rules in platform namespaces must be managed via AlertingRule +// CRs rather than direct PrometheusRule manipulation. +func (c *client) isPlatformManagedPrometheusRule(nn types.NamespacedName) bool { + return c.k8sClient.Namespace().IsClusterMonitoringNamespace(nn.Namespace) +} diff --git a/pkg/management/testutils/k8s_client_mock.go b/pkg/management/testutils/k8s_client_mock.go new file mode 100644 index 000000000..0b9adbdb2 --- /dev/null +++ b/pkg/management/testutils/k8s_client_mock.go @@ -0,0 +1,437 @@ +package testutils + +import ( + "context" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/relabel" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +// MockClient is a mock implementation of k8s.Client interface. +// Each accessor lazily initializes a single backing mock so that +// write-then-read sequences (e.g. AlertingRules().Create followed +// by AlertingRules().Get) hit the same store. +type MockClient struct { + TestConnectionFunc func(ctx context.Context) error + PrometheusRulesFunc func() k8s.PrometheusRuleInterface + AlertRelabelConfigsFunc func() k8s.AlertRelabelConfigInterface + AlertingRulesFunc func() k8s.AlertingRuleInterface + RelabeledRulesFunc func() k8s.RelabeledRulesInterface + NamespaceFunc func() k8s.NamespaceInterface + + prometheusRules k8s.PrometheusRuleInterface + alertRelabelConfigs k8s.AlertRelabelConfigInterface + alertingRules k8s.AlertingRuleInterface + relabeledRules k8s.RelabeledRulesInterface + namespace k8s.NamespaceInterface +} + +// TestConnection mocks the TestConnection method +func (m *MockClient) TestConnection(ctx context.Context) error { + if m.TestConnectionFunc != nil { + return m.TestConnectionFunc(ctx) + } + return nil +} + +func (m *MockClient) PrometheusRules() k8s.PrometheusRuleInterface { + if m.PrometheusRulesFunc != nil { + return m.PrometheusRulesFunc() + } + if m.prometheusRules == nil { + m.prometheusRules = &MockPrometheusRuleInterface{} + } + return m.prometheusRules +} + +func (m *MockClient) AlertRelabelConfigs() k8s.AlertRelabelConfigInterface { + if m.AlertRelabelConfigsFunc != nil { + return m.AlertRelabelConfigsFunc() + } + if m.alertRelabelConfigs == nil { + m.alertRelabelConfigs = &MockAlertRelabelConfigInterface{} + } + return m.alertRelabelConfigs +} + +func (m *MockClient) AlertingRules() k8s.AlertingRuleInterface { + if m.AlertingRulesFunc != nil { + return m.AlertingRulesFunc() + } + if m.alertingRules == nil { + m.alertingRules = &MockAlertingRuleInterface{} + } + return m.alertingRules +} + +func (m *MockClient) RelabeledRules() k8s.RelabeledRulesInterface { + if m.RelabeledRulesFunc != nil { + return m.RelabeledRulesFunc() + } + if m.relabeledRules == nil { + m.relabeledRules = &MockRelabeledRulesInterface{} + } + return m.relabeledRules +} + +func (m *MockClient) Namespace() k8s.NamespaceInterface { + if m.NamespaceFunc != nil { + return m.NamespaceFunc() + } + if m.namespace == nil { + m.namespace = &MockNamespaceInterface{} + } + return m.namespace +} + +// MockPrometheusRuleInterface is a mock implementation of k8s.PrometheusRuleInterface +type MockPrometheusRuleInterface struct { + ListFunc func() ([]monitoringv1.PrometheusRule, error) + GetFunc func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) + UpdateFunc func(ctx context.Context, pr monitoringv1.PrometheusRule) error + DeleteFunc func(ctx context.Context, namespace string, name string) error + AddRuleFunc func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error + + // Storage for test data + PrometheusRules map[string]*monitoringv1.PrometheusRule +} + +func (m *MockPrometheusRuleInterface) SetPrometheusRules(rules map[string]*monitoringv1.PrometheusRule) { + m.PrometheusRules = rules +} + +// List mocks the List method +func (m *MockPrometheusRuleInterface) List() ([]monitoringv1.PrometheusRule, error) { + if m.ListFunc != nil { + return m.ListFunc() + } + + var rules []monitoringv1.PrometheusRule + if m.PrometheusRules != nil { + for _, rule := range m.PrometheusRules { + rules = append(rules, *rule) + } + } + return rules, nil +} + +// Get mocks the Get method +func (m *MockPrometheusRuleInterface) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.PrometheusRules != nil { + if rule, exists := m.PrometheusRules[key]; exists { + return rule, true, nil + } + } + + return nil, false, nil +} + +// Update mocks the Update method +func (m *MockPrometheusRuleInterface) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { + if m.UpdateFunc != nil { + return m.UpdateFunc(ctx, pr) + } + + key := pr.Namespace + "/" + pr.Name + if m.PrometheusRules == nil { + m.PrometheusRules = make(map[string]*monitoringv1.PrometheusRule) + } + m.PrometheusRules[key] = &pr + return nil +} + +// Delete mocks the Delete method +func (m *MockPrometheusRuleInterface) Delete(ctx context.Context, namespace string, name string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.PrometheusRules != nil { + delete(m.PrometheusRules, key) + } + return nil +} + +// AddRule mocks the AddRule method +func (m *MockPrometheusRuleInterface) AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + if m.AddRuleFunc != nil { + return m.AddRuleFunc(ctx, namespacedName, groupName, rule) + } + + key := namespacedName.Namespace + "/" + namespacedName.Name + if m.PrometheusRules == nil { + m.PrometheusRules = make(map[string]*monitoringv1.PrometheusRule) + } + + // Get or create PrometheusRule + pr, exists := m.PrometheusRules[key] + if !exists { + pr = &monitoringv1.PrometheusRule{ + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{}, + }, + } + pr.Name = namespacedName.Name + pr.Namespace = namespacedName.Namespace + m.PrometheusRules[key] = pr + } + + // Find or create the group + var group *monitoringv1.RuleGroup + for i := range pr.Spec.Groups { + if pr.Spec.Groups[i].Name == groupName { + group = &pr.Spec.Groups[i] + break + } + } + if group == nil { + pr.Spec.Groups = append(pr.Spec.Groups, monitoringv1.RuleGroup{ + Name: groupName, + Rules: []monitoringv1.Rule{}, + }) + group = &pr.Spec.Groups[len(pr.Spec.Groups)-1] + } + + // Add the new rule to the group + group.Rules = append(group.Rules, rule) + + return nil +} + +// MockAlertRelabelConfigInterface is a mock implementation of k8s.AlertRelabelConfigInterface +type MockAlertRelabelConfigInterface struct { + ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + GetFunc func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) + CreateFunc func(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) + UpdateFunc func(ctx context.Context, arc osmv1.AlertRelabelConfig) error + DeleteFunc func(ctx context.Context, namespace string, name string) error + + // Storage for test data + AlertRelabelConfigs map[string]*osmv1.AlertRelabelConfig +} + +func (m *MockAlertRelabelConfigInterface) SetAlertRelabelConfigs(configs map[string]*osmv1.AlertRelabelConfig) { + m.AlertRelabelConfigs = configs +} + +// List mocks the List method +func (m *MockAlertRelabelConfigInterface) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, namespace) + } + + var configs []osmv1.AlertRelabelConfig + if m.AlertRelabelConfigs != nil { + for _, config := range m.AlertRelabelConfigs { + if namespace == "" || config.Namespace == namespace { + configs = append(configs, *config) + } + } + } + return configs, nil +} + +// Get mocks the Get method +func (m *MockAlertRelabelConfigInterface) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.AlertRelabelConfigs != nil { + if config, exists := m.AlertRelabelConfigs[key]; exists { + return config, true, nil + } + } + + return nil, false, nil +} + +// Create mocks the Create method +func (m *MockAlertRelabelConfigInterface) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + if m.CreateFunc != nil { + return m.CreateFunc(ctx, arc) + } + + key := arc.Namespace + "/" + arc.Name + if m.AlertRelabelConfigs == nil { + m.AlertRelabelConfigs = make(map[string]*osmv1.AlertRelabelConfig) + } + m.AlertRelabelConfigs[key] = &arc + return &arc, nil +} + +// Update mocks the Update method +func (m *MockAlertRelabelConfigInterface) Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + if m.UpdateFunc != nil { + return m.UpdateFunc(ctx, arc) + } + + key := arc.Namespace + "/" + arc.Name + if m.AlertRelabelConfigs == nil { + m.AlertRelabelConfigs = make(map[string]*osmv1.AlertRelabelConfig) + } + m.AlertRelabelConfigs[key] = &arc + return nil +} + +// Delete mocks the Delete method +func (m *MockAlertRelabelConfigInterface) Delete(ctx context.Context, namespace string, name string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.AlertRelabelConfigs != nil { + delete(m.AlertRelabelConfigs, key) + } + return nil +} + +// MockAlertingRuleInterface is a mock implementation of k8s.AlertingRuleInterface +type MockAlertingRuleInterface struct { + ListFunc func(ctx context.Context) ([]osmv1.AlertingRule, error) + GetFunc func(ctx context.Context, name string) (*osmv1.AlertingRule, bool, error) + CreateFunc func(ctx context.Context, ar osmv1.AlertingRule) (*osmv1.AlertingRule, error) + UpdateFunc func(ctx context.Context, ar osmv1.AlertingRule) error + DeleteFunc func(ctx context.Context, name string) error + + // Storage for test data + AlertingRules map[string]*osmv1.AlertingRule +} + +func (m *MockAlertingRuleInterface) SetAlertingRules(rules map[string]*osmv1.AlertingRule) { + m.AlertingRules = rules +} + +// List mocks the List method +func (m *MockAlertingRuleInterface) List(ctx context.Context) ([]osmv1.AlertingRule, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx) + } + + var rules []osmv1.AlertingRule + if m.AlertingRules != nil { + for _, rule := range m.AlertingRules { + if rule.Namespace == k8s.ClusterMonitoringNamespace { + rules = append(rules, *rule) + } + } + } + return rules, nil +} + +// Get mocks the Get method +func (m *MockAlertingRuleInterface) Get(ctx context.Context, name string) (*osmv1.AlertingRule, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, name) + } + + key := k8s.ClusterMonitoringNamespace + "/" + name + if m.AlertingRules != nil { + if rule, exists := m.AlertingRules[key]; exists { + return rule, true, nil + } + } + + return nil, false, nil +} + +// Create mocks the Create method +func (m *MockAlertingRuleInterface) Create(ctx context.Context, ar osmv1.AlertingRule) (*osmv1.AlertingRule, error) { + if m.CreateFunc != nil { + return m.CreateFunc(ctx, ar) + } + + key := ar.Namespace + "/" + ar.Name + if m.AlertingRules == nil { + m.AlertingRules = make(map[string]*osmv1.AlertingRule) + } + m.AlertingRules[key] = &ar + return &ar, nil +} + +// Update mocks the Update method +func (m *MockAlertingRuleInterface) Update(ctx context.Context, ar osmv1.AlertingRule) error { + if m.UpdateFunc != nil { + return m.UpdateFunc(ctx, ar) + } + + key := ar.Namespace + "/" + ar.Name + if m.AlertingRules == nil { + m.AlertingRules = make(map[string]*osmv1.AlertingRule) + } + m.AlertingRules[key] = &ar + return nil +} + +// Delete mocks the Delete method +func (m *MockAlertingRuleInterface) Delete(ctx context.Context, name string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, name) + } + + key := k8s.ClusterMonitoringNamespace + "/" + name + if m.AlertingRules != nil { + delete(m.AlertingRules, key) + } + return nil +} + +// MockRelabeledRulesInterface is a mock implementation of k8s.RelabeledRulesInterface +type MockRelabeledRulesInterface struct { + ListFunc func(ctx context.Context) []monitoringv1.Rule + GetFunc func(ctx context.Context, id string) (monitoringv1.Rule, bool) + ConfigFunc func() []*relabel.Config +} + +func (m *MockRelabeledRulesInterface) List(ctx context.Context) []monitoringv1.Rule { + if m.ListFunc != nil { + return m.ListFunc(ctx) + } + return []monitoringv1.Rule{} +} + +func (m *MockRelabeledRulesInterface) Get(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if m.GetFunc != nil { + return m.GetFunc(ctx, id) + } + return monitoringv1.Rule{}, false +} + +func (m *MockRelabeledRulesInterface) Config() []*relabel.Config { + if m.ConfigFunc != nil { + return m.ConfigFunc() + } + return []*relabel.Config{} +} + +// MockNamespaceInterface is a mock implementation of k8s.NamespaceInterface +type MockNamespaceInterface struct { + IsClusterMonitoringNamespaceFunc func(name string) bool + + // Storage for test data + MonitoringNamespaces map[string]bool +} + +func (m *MockNamespaceInterface) SetMonitoringNamespaces(namespaces map[string]bool) { + m.MonitoringNamespaces = namespaces +} + +// IsClusterMonitoringNamespace mocks the IsClusterMonitoringNamespace method +func (m *MockNamespaceInterface) IsClusterMonitoringNamespace(name string) bool { + if m.IsClusterMonitoringNamespaceFunc != nil { + return m.IsClusterMonitoringNamespaceFunc(name) + } + return m.MonitoringNamespaces[name] +} diff --git a/pkg/management/types.go b/pkg/management/types.go new file mode 100644 index 000000000..5ec3fc055 --- /dev/null +++ b/pkg/management/types.go @@ -0,0 +1,28 @@ +package management + +import ( + "context" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +// Client is the interface for managing alert rules +type Client interface { + // CreateUserDefinedAlertRule creates a new user-defined alert rule + CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (alertRuleId string, err error) + + // CreatePlatformAlertRule creates a new platform alert rule + CreatePlatformAlertRule(ctx context.Context, alertRule monitoringv1.Rule) (alertRuleId string, err error) +} + +// PrometheusRuleOptions specifies options for selecting PrometheusRule resources and groups +type PrometheusRuleOptions struct { + // Name of the PrometheusRule resource where the alert rule will be added/listed from + Name string `json:"prometheusRuleName"` + + // Namespace of the PrometheusRule resource where the alert rule will be added/listed from + Namespace string `json:"prometheusRuleNamespace"` + + // GroupName of the RuleGroup within the PrometheusRule resource + GroupName string `json:"groupName"` +} diff --git a/pkg/managementlabels/management_labels.go b/pkg/managementlabels/management_labels.go new file mode 100644 index 000000000..757c00fdf --- /dev/null +++ b/pkg/managementlabels/management_labels.go @@ -0,0 +1,28 @@ +package managementlabels + +const ( + // RuleManagedByLabel indicates which system manages the alert rule lifecycle. + RuleManagedByLabel = "openshift_io_rule_managed_by" + // RelabelConfigManagedByLabel indicates which system manages the relabel config lifecycle. + RelabelConfigManagedByLabel = "openshift_io_relabel_config_managed_by" + // AlertNameLabel is the standard Prometheus label for an alert's name. + AlertNameLabel = "alertname" + // AlertingRuleLabelName stores the name of the AlertingRule resource that owns the rule. + AlertingRuleLabelName = "openshift_io_alerting_rule_name" + + // ManagedByOperator indicates the resource is managed by a Kubernetes operator. + ManagedByOperator = "operator" + // ManagedByGitOps indicates the resource is managed via GitOps (e.g. ArgoCD, Flux). + ManagedByGitOps = "gitops" +) + +// ARC-related label and annotation keys link AlertRelabelConfigs back to their +// source PrometheusRule and alert, enabling lifecycle management. +const ( + // ARCLabelPrometheusRuleNameKey stores the name of the source PrometheusRule. + ARCLabelPrometheusRuleNameKey = "monitoring.openshift.io/prometheusrule-name" + // ARCLabelAlertNameKey stores the alert name this relabel config applies to. + ARCLabelAlertNameKey = "monitoring.openshift.io/alertname" + // ARCAnnotationAlertRuleIDKey stores the computed alert rule ID for cross-referencing. + ARCAnnotationAlertRuleIDKey = "monitoring.openshift.io/alertRuleId" +) diff --git a/pkg/server.go b/pkg/server.go index 552f06103..323c83656 100644 --- a/pkg/server.go +++ b/pkg/server.go @@ -21,6 +21,9 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/record" + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" "github.com/openshift/monitoring-plugin/pkg/proxy" ) @@ -146,7 +149,29 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { k8sclient = nil } - router, pluginConfig := setupRoutes(cfg) + // Initialize management client if management API feature is enabled. + // Use a bounded timeout so a slow/unreachable API server doesn't + // hang the entire server startup indefinitely. + var managementClient management.Client + if alertManagementAPIMode { + const initTimeout = 30 * time.Second + initCtx, initCancel := context.WithTimeout(ctx, initTimeout) + defer initCancel() + + k8sClient, err := k8s.NewClient(initCtx, k8sconfig) + if err != nil { + return nil, fmt.Errorf("failed to create k8s client for alert management API: %w", err) + } + + if err := k8sClient.TestConnection(initCtx); err != nil { + return nil, fmt.Errorf("failed to connect to kubernetes cluster for alert management API: %w", err) + } + + managementClient = management.New(ctx, k8sClient) + log.Info("alert management API enabled") + } + + router, pluginConfig := setupRoutes(cfg, managementClient) router.Use(corsHeaderMiddleware()) tlsConfig := &tls.Config{} @@ -237,7 +262,7 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { return httpServer, nil } -func setupRoutes(cfg *Config) (*mux.Router, *PluginConfig) { +func setupRoutes(cfg *Config, managementClient management.Client) (*mux.Router, *PluginConfig) { configHandlerFunc, pluginConfig := configHandler(cfg) router := mux.NewRouter() @@ -248,6 +273,12 @@ func setupRoutes(cfg *Config) (*mux.Router, *PluginConfig) { router.PathPrefix("/features").HandlerFunc(featuresHandler(cfg)) router.PathPrefix("/config").HandlerFunc(configHandlerFunc) + + if managementClient != nil { + managementRouter := managementrouter.New(managementClient) + router.PathPrefix("/api/v1/alerting").Handler(managementRouter) + } + router.PathPrefix("/").Handler(filesHandler(http.Dir(cfg.StaticPath))) return router, pluginConfig diff --git a/test/e2e/create_alert_rule_test.go b/test/e2e/create_alert_rule_test.go new file mode 100644 index 000000000..abcda07d7 --- /dev/null +++ b/test/e2e/create_alert_rule_test.go @@ -0,0 +1,122 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestCreateUserDefinedAlertRule(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-create-rule", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + payload := managementrouter.CreateAlertRuleRequest{ + AlertingRule: &managementrouter.AlertRuleSpec{ + Alert: strPtr("E2ECreateAlert"), + Expr: strPtr("vector(1)"), + For: strPtr("1m"), + Labels: &map[string]string{ + "severity": "info", + }, + Annotations: &map[string]string{ + "summary": "E2E test alert for create-rule", + }, + }, + PrometheusRule: &managementrouter.PrometheusRuleTarget{ + PrometheusRuleName: "e2e-create-pr", + PrometheusRuleNamespace: testNamespace, + }, + } + + reqBody, err := json.Marshal(payload) + if err != nil { + t.Fatalf("Failed to marshal request: %v", err) + } + + createURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, createURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create HTTP request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make create request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status 201, got %d. Body: %s", resp.StatusCode, string(body)) + } + + var createResp managementrouter.CreateAlertRuleResponse + if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if createResp.Id == "" { + t.Fatal("Expected non-empty rule ID in response") + } + t.Logf("Created rule with ID: %s", createResp.Id) + + promRule, err := f.Monitoringv1clientset.MonitoringV1().PrometheusRules(testNamespace).Get( + ctx, "e2e-create-pr", metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get PrometheusRule: %v", err) + } + + if len(promRule.Spec.Groups) == 0 { + t.Fatal("Expected at least one rule group in PrometheusRule") + } + + var foundAlert bool + for _, group := range promRule.Spec.Groups { + for _, rule := range group.Rules { + if rule.Alert == "E2ECreateAlert" { + foundAlert = true + if rule.Expr.String() != "vector(1)" { + t.Errorf("Expected expr 'vector(1)', got %q", rule.Expr.String()) + } + if rule.For == nil || string(*rule.For) != "1m" { + t.Errorf("Expected for '1m', got %v", rule.For) + } + if rule.Labels["severity"] != "info" { + t.Errorf("Expected severity=info, got %q", rule.Labels["severity"]) + } + if rule.Annotations["summary"] != "E2E test alert for create-rule" { + t.Errorf("Expected summary annotation, got %q", rule.Annotations["summary"]) + } + } + } + } + + if !foundAlert { + t.Fatal("Alert 'E2ECreateAlert' not found in PrometheusRule") + } + + t.Log("Create alert rule e2e test passed successfully") +} diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go new file mode 100644 index 000000000..7982c4e58 --- /dev/null +++ b/test/e2e/framework/framework.go @@ -0,0 +1,133 @@ +package framework + +import ( + "context" + "crypto/tls" + "fmt" + "net/http" + "os" + "strconv" + "strings" + "time" + + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +var f *Framework + +type Framework struct { + Clientset *kubernetes.Clientset + Monitoringv1clientset *monitoringv1client.Clientset + Osmv1clientset *osmv1client.Clientset + + PluginURL string + BearerToken string + httpClient *http.Client +} + +type CleanupFunc func() error + +func New() (*Framework, error) { + if f != nil { + return f, nil + } + + kubeConfigPath := os.Getenv("KUBECONFIG") + if kubeConfigPath == "" { + return nil, fmt.Errorf("KUBECONFIG environment variable not set") + } + + pluginURL := os.Getenv("PLUGIN_URL") + if pluginURL == "" { + return nil, fmt.Errorf("PLUGIN_URL environment variable not set, skipping management API e2e test") + } + + config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to build config: %w", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + + monitoringv1clientset, err := monitoringv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create monitoringv1 clientset: %w", err) + } + + osmv1clientset, err := osmv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create osmv1 clientset: %w", err) + } + + f = &Framework{ + Clientset: clientset, + Monitoringv1clientset: monitoringv1clientset, + Osmv1clientset: osmv1clientset, + PluginURL: pluginURL, + BearerToken: config.BearerToken, + } + + return f, nil +} + +// HTTPClient returns a shared *http.Client configured for the plugin URL. +// For HTTPS endpoints it skips certificate verification (self-signed certs +// used by in-cluster deployments behind port-forward). The client is reused +// across calls to keep connections alive and avoid exhausting port-forward tunnels. +func (f *Framework) HTTPClient() *http.Client { + if f.httpClient != nil { + return f.httpClient + } + transport := http.DefaultTransport.(*http.Transport).Clone() + if strings.HasPrefix(f.PluginURL, "https://") { + transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } + f.httpClient = &http.Client{ + Timeout: 30 * time.Second, + Transport: transport, + } + return f.httpClient +} + +func (f *Framework) CreateNamespace(ctx context.Context, name string, isClusterMonitoringNamespace bool) (string, CleanupFunc, error) { + testNamespace := fmt.Sprintf("%s-%d", name, time.Now().Unix()) + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNamespace, + Labels: map[string]string{ + k8s.ClusterMonitoringLabel: strconv.FormatBool(isClusterMonitoringNamespace), + }, + }, + } + + _, err := f.Clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}) + if err != nil { + return "", nil, fmt.Errorf("failed to create test namespace: %w", err) + } + + return testNamespace, func() error { + return f.Clientset.CoreV1().Namespaces().Delete(ctx, testNamespace, metav1.DeleteOptions{}) + }, nil +} + +// AuthorizedRequest creates an HTTP request with the Bearer token set. +func (f *Framework) AuthorizedRequest(ctx context.Context, method, url string, body interface{ Read([]byte) (int, error) }) (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, method, url, body) + if err != nil { + return nil, err + } + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + return req, nil +} diff --git a/test/e2e/helpers_test.go b/test/e2e/helpers_test.go new file mode 100644 index 000000000..8071266fe --- /dev/null +++ b/test/e2e/helpers_test.go @@ -0,0 +1,70 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func strPtr(s string) *string { return &s } + +func createRuleViaAPI(t *testing.T, f *framework.Framework, ctx context.Context, namespace, alertName, prName string) string { + t.Helper() + + payload := managementrouter.CreateAlertRuleRequest{ + AlertingRule: &managementrouter.AlertRuleSpec{ + Alert: &alertName, + Expr: strPtr("vector(1)"), + For: strPtr("1m"), + Labels: &map[string]string{ + "severity": "info", + }, + }, + PrometheusRule: &managementrouter.PrometheusRuleTarget{ + PrometheusRuleName: prName, + PrometheusRuleNamespace: namespace, + }, + } + + reqBody, err := json.Marshal(payload) + if err != nil { + t.Fatalf("Failed to marshal create request for %s: %v", alertName, err) + } + + createURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, createURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create HTTP request for %s: %v", alertName, err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make create request for %s: %v", alertName, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Create %s: expected 201, got %d. Body: %s", alertName, resp.StatusCode, string(body)) + } + + var createResp managementrouter.CreateAlertRuleResponse + if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil { + t.Fatalf("Failed to decode create response for %s: %v", alertName, err) + } + + if createResp.Id == "" { + t.Fatalf("Got empty ID for %s", alertName) + } + return createResp.Id +}