Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Unit Tests

on:
pull_request:
branches:
- add-alert-management-api-base

jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Run tests
run: go test -count=1 $(go list ./... | grep -v /test/e2e)
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ RUN make install-backend

COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

ENV GOEXPERIMENT=strictfipsruntime
ENV CGO_ENABLED=1
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ RUN go mod download

COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

RUN go build -mod=mod -o plugin-backend cmd/plugin-backend.go

Expand Down
1 change: 1 addition & 0 deletions Dockerfile.dev-mcp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ RUN go mod download

COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

RUN go build -mod=mod -o plugin-backend cmd/plugin-backend.go

Expand Down
1 change: 1 addition & 0 deletions Dockerfile.devspace
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ RUN make install-backend
COPY config/ config/
COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

RUN make build-backend

Expand Down
1 change: 1 addition & 0 deletions Dockerfile.konflux
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ RUN make install-backend

COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

ENV GOEXPERIMENT=strictfipsruntime
ENV CGO_ENABLED=1
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.mcp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ RUN make install-backend

COPY cmd/ cmd/
COPY pkg/ pkg/
COPY internal/ internal/

ENV GOOS=${TARGETOS:-linux}
ENV GOARCH=${TARGETARCH}
Expand Down
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ lint-frontend:
install-backend:
go mod download

.PHONY: generate-backend
generate-backend:
go run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen --config api/oapi-codegen.yaml api/openapi.yaml

.PHONY: build-backend
build-backend:
go build $(BUILD_OPTS) -mod=readonly -o plugin-backend cmd/plugin-backend.go
Expand All @@ -56,7 +60,11 @@ start-backend:

.PHONY: test-backend
test-backend:
go test ./pkg/... -v
go test ./pkg/... ./internal/... -v

.PHONY: test-e2e
test-e2e:
PLUGIN_URL=http://localhost:9001 go test -v -timeout=150m -count=1 ./test/e2e

.PHONY: test-frontend
test-frontend:
Expand Down
19 changes: 19 additions & 0 deletions api/oapi-codegen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# oapi-codegen configuration for the monitoring-plugin management API.
# Run: oapi-codegen --config api/oapi-codegen.yaml api/openapi.yaml

package: managementrouter
output: internal/managementrouter/api_generated.go

generate:
# Generate the gorilla/mux router bindings (RegisterHandlers / RegisterHandlersWithBaseURL)
gorilla-server: true
# Generate request/response types from the spec schemas
models: true
# Do not generate an embedded spec — it adds binary bloat with no benefit here
embedded-spec: false

output-options:
# Silence the "do not edit" header so editors don't flag the file in git diff
skip-fmt: false
# Keep generated file name stable for git
user-templates: {}
151 changes: 151 additions & 0 deletions api/openapi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
openapi: "3.0.3"
info:
title: Monitoring Plugin Management API
description: >
API for managing alert rules in OpenShift Monitoring Plugin.
All endpoints require a valid OpenShift user bearer token in the
Authorization header (forwarded by the console bridge).
version: "1.0.0"

servers:
- url: /api/v1/alerting

paths:
/rules:
post:
operationId: CreateAlertRule
summary: Create an alert rule
description: >
Creates a new alert rule. If prometheusRule is omitted the rule is
created as a platform alert rule; if prometheusRule is provided the
rule is created as a user-defined alert rule in the specified
PrometheusRule resource.
requestBody:
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/CreateAlertRuleRequest"
responses:
"201":
description: Alert rule created successfully
content:
application/json:
schema:
$ref: "#/components/schemas/CreateAlertRuleResponse"
"400":
description: Invalid request
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"
"401":
description: Missing or invalid authorization token
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"
"404":
description: Resource not found
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"
"405":
description: Operation not allowed (e.g. rule is externally managed)
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"
"409":
description: Conflict (e.g. duplicate rule ID)
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"
"500":
description: Unexpected server error
content:
application/json:
schema:
$ref: "#/components/schemas/ErrorResponse"

components:
schemas:
AlertRuleSpec:
type: object
description: >
Specification of a Prometheus alerting or recording rule.
Maps to prometheus-operator Rule fields.
properties:
alert:
type: string
description: Name of the alert. Must be set for alerting rules.
record:
type: string
description: Name of the time series for recording rules.
expr:
type: string
description: PromQL expression to evaluate.
for:
type: string
description: Duration the condition must be true before firing (e.g. "5m").
labels:
type: object
additionalProperties:
type: string
description: Labels to attach to alerts produced by the rule.
annotations:
type: object
additionalProperties:
type: string
description: Annotations to attach to alerts produced by the rule.
keepFiringFor:
type: string
description: >
Duration to keep alert firing after the condition is no longer true.

PrometheusRuleTarget:
type: object
description: >
Identifies the PrometheusRule resource and rule group where the alert
rule will be stored. Required for user-defined alert rules.
required:
- prometheusRuleName
- prometheusRuleNamespace
properties:
prometheusRuleName:
type: string
description: Name of the PrometheusRule resource.
prometheusRuleNamespace:
type: string
description: Namespace of the PrometheusRule resource.
groupName:
type: string
description: Name of the rule group within the PrometheusRule. Optional.

CreateAlertRuleRequest:
type: object
properties:
alertingRule:
$ref: "#/components/schemas/AlertRuleSpec"
prometheusRule:
$ref: "#/components/schemas/PrometheusRuleTarget"

CreateAlertRuleResponse:
type: object
required:
- id
properties:
id:
type: string
description: Computed stable ID for the created alert rule.

ErrorResponse:
type: object
required:
- error
properties:
error:
type: string
description: Human-readable error message.
41 changes: 41 additions & 0 deletions docs/alert-management.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
## Alert Management Notes

This document covers alert management behavior and prerequisites for the monitoring plugin.

### User workload monitoring prerequisites

To include **user workload** alerts and rules in `/api/v1/alerting/alerts` and `/api/v1/alerting/rules`, the user workload monitoring stack must be enabled. Follow the OpenShift documentation for enabling and configuring UWM:

https://docs.redhat.com/en/documentation/monitoring_stack_for_red_hat_openshift/4.20/html/configuring_user_workload_monitoring/configuring-alerts-and-notifications-uwm

#### How the plugin reads user workload alerts/rules

The plugin prefers **Thanos tenancy** for user workload alerts/rules (RBAC-scoped, requires a namespace parameter). When the client does not provide a `namespace` filter, the plugin discovers candidate namespaces and queries Thanos tenancy per-namespace, using the end-user bearer token.

Routes in `openshift-user-workload-monitoring` are treated as **fallbacks** (and are also used for some health checks and pending state retrieval).

If you want to create the user workload Prometheus route (optional), you can expose the service:

```shell
oc -n openshift-user-workload-monitoring expose svc/prometheus-user-workload-web --name=prometheus-user-workload-web --port=web
```

If the route is missing/unreachable but tenancy is healthy, the plugin should still return user workload data and suppress route warnings.

#### Alert states

- `/api/v1/alerting/alerts?state=pending`: pending alerts come from Prometheus.
- `/api/v1/alerting/alerts?state=firing`: firing alerts come from Alertmanager when available.
- `/api/v1/alerting/alerts?state=silenced`: silenced alerts come from Alertmanager (requires an Alertmanager endpoint).

### Alertmanager routing choices

OpenShift supports routing user workload alerts to:

- The **platform Alertmanager** (default instance)
- A **separate Alertmanager** for user workloads
- **External Alertmanager** instances

This is a cluster configuration choice and does not change the plugin API shape. The plugin reads alerts from Alertmanager (for firing/silenced) and Prometheus (for pending), then merges platform and user workload results when available.

The plugin intentionally reads from only the in-cluster Alertmanager endpoints. Supporting multiple external Alertmanagers would introduce ambiguous alert state and silencing outcomes because each instance can apply different routing, inhibition, and silence configurations.
14 changes: 12 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ require (
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/gorilla/handlers v1.5.2
github.com/gorilla/mux v1.8.1
github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9
github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287
github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0
github.com/prometheus/common v0.67.4
github.com/prometheus/prometheus v0.308.0
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.11.1
gopkg.in/yaml.v2 v2.4.0
Expand All @@ -20,7 +23,10 @@ require (
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dennwc/varint v1.0.0 // indirect
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
Expand All @@ -43,16 +49,20 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/gnostic-models v0.7.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/net v0.46.0 // indirect
Expand Down
Loading