From a5cd8543870b06862e5dad0cf93c10630f3ad06f Mon Sep 17 00:00:00 2001 From: Anatolii Bazko Date: Mon, 11 May 2026 12:23:20 +0000 Subject: [PATCH] docs: Update metrics procedures for automated Prometheus resource management Update documentation to reflect that the Che Operator now automatically manages Prometheus resources (ServiceMonitor, Role, RoleBinding) for both Che Server and DevWorkspace Operator metrics collection. Changes: - Remove manual ServiceMonitor/RBAC creation steps from collection procedures - Add note about automatic resource management by the operator - Document new spec.devEnvironments.metrics.enable field for DWO metrics - Update prerequisites to mention Prometheus Operator requirement - Improve verification steps to check operator-created resources Related: eclipse-che/che-operator#2117 --- ...ollecting-che-metrics-with-prometheus.adoc | 101 ++++-------------- ...pace-operator-metrics-with-prometheus.adoc | 63 +++++------ ...roc_enabling-and-exposing-che-metrics.adoc | 20 +++- 3 files changed, 62 insertions(+), 122 deletions(-) diff --git a/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc b/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc index b86014256a..36c7b53c2a 100644 --- a/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc +++ b/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc @@ -11,95 +11,37 @@ To use the in-cluster Prometheus instance to collect, store, and query JVM metri * An active `oc` session with administrative permissions to the destination OpenShift cluster. See link:https://docs.openshift.com/container-platform/{ocp4-ver}/cli_reference/openshift_cli/getting-started-cli.html[Getting started with the CLI]. -* {prod-short} is exposing metrics on port `8087`. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling and exposing {prod-short} server JVM metrics]. +* {prod-short} Server metrics are enabled. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling and exposing {prod-short} Server JVM metrics]. + +* Prometheus Operator is installed on the cluster. .Procedure -. Create the ServiceMonitor for detecting the {prod-short} JVM metrics Service. +. Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + -.ServiceMonitor -==== -[source,yaml,subs="+quotes,+attributes,+macros"] +[source,terminal,subs="+attributes,quotes"] ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: che-host - namespace: {prod-namespace} <1> -spec: - endpoints: - - interval: 10s <2> - port: metrics - scheme: http - namespaceSelector: - matchNames: - - {prod-namespace} <1> - selector: - matchLabels: - app.kubernetes.io/name: {prod-deployment} +$ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true ---- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -<2> The rate at which a target is scraped. -==== - -. Create a Role and RoleBinding to allow Prometheus to view the metrics. - + -.Role -==== -[source,yaml,subs="+quotes,+attributes,+macros"] ----- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: prometheus-k8s - namespace: {prod-namespace} <1> -rules: - - verbs: - - get - - list - - watch - apiGroups: - - '' - resources: - - services - - endpoints - - pods ----- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -==== +NOTE: The {prod-short} Operator automatically creates and manages the ServiceMonitor, Role, and RoleBinding resources required for Prometheus to scrape {prod-short} Server metrics when metrics are enabled. + +.Verification +. Verify that the ServiceMonitor has been created: + -.RoleBinding -==== -[source,yaml,subs="+quotes,+attributes,+macros"] +[source,terminal,subs="+attributes,quotes"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: view-{prod-id-short}-openshift-monitoring-prometheus-k8s - namespace: {prod-namespace} <1> -subjects: - - kind: ServiceAccount - name: prometheus-k8s - namespace: openshift-monitoring -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s +$ oc get servicemonitor -n {prod-namespace} ---- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -==== - -. Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + -[source,terminal,subs="+attributes,quotes"] +.Example output +[source,terminal,subs="+attributes"] ---- -$ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true +NAME AGE +che-host 5m ---- -.Verification - . In the *Administrator* view of the OpenShift web console, go to *Observe* -> *Metrics*. . Run a PromQL query to confirm that the metrics are available. For example, enter `process_uptime_seconds{job="che-host"}` and click *Run queries*. @@ -107,19 +49,20 @@ $ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true [TIP] ==== -To troubleshoot missing metrics, view the Prometheus container logs for possible RBAC-related errors: +To troubleshoot missing metrics: -. Get the name of the Prometheus pod: +. Verify the ServiceMonitor, Role, and RoleBinding were created by the {prod-short} Operator: + -[source,yaml,subs="+quotes"] +[source,terminal,subs="+attributes,quotes"] ---- -$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' +$ oc get servicemonitor,role,rolebinding -n {prod-namespace} | grep -E "che-host|prometheus" ---- -. Print the last 20 lines of the Prometheus container logs from the Prometheus pod from the previous step: +. View the Prometheus container logs for possible RBAC-related errors: + [source,yaml,subs="+quotes"] ---- +$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' $ oc logs --tail=20 ____ -c prometheus -n openshift-monitoring ---- diff --git a/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc b/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc index b198f24bd1..3ae0ac0cbf 100644 --- a/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc +++ b/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc @@ -10,41 +10,11 @@ To use the in-cluster Prometheus instance to collect, store, and query metrics a * An active `oc` session with administrative permissions to the destination OpenShift cluster. See link:https://docs.openshift.com/container-platform/{ocp4-ver}/cli_reference/openshift_cli/getting-started-cli.html[Getting started with the CLI]. -* The `devworkspace-controller-metrics` Service is exposing metrics on port `8443`. This is preconfigured by default. +* {devworkspace} Operator metrics are enabled. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling {devworkspace} Operator metrics]. -.Procedure - -. Create the ServiceMonitor for detecting the Dev Workspace Operator metrics Service. -+ -.ServiceMonitor -==== -[source,yaml,subs="+quotes,+attributes,+macros"] ----- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: devworkspace-controller - namespace: {prod-namespace} <1> -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 10s <2> - port: metrics - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - openshift-operators - selector: - matchLabels: - app.kubernetes.io/name: devworkspace-controller ----- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -<2> The rate at which a target is scraped. -==== +* Prometheus Operator is installed on the cluster. -include::example$snip_{project-context}-create-a-role-and-rolebinding-for-prometheus-to-view-metrics.adoc[] +.Procedure . Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + @@ -52,9 +22,25 @@ include::example$snip_{project-context}-create-a-role-and-rolebinding-for-promet ---- $ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true ---- ++ +NOTE: The {prod-short} Operator automatically creates and manages the ServiceMonitor, Role, and RoleBinding resources required for Prometheus to scrape {devworkspace} Operator metrics when metrics are enabled. .Verification +. Verify that the ServiceMonitor has been created: ++ +[source,terminal,subs="+attributes,quotes"] +---- +$ oc get servicemonitor -n {prod-namespace} +---- ++ +.Example output +[source,terminal,subs="+attributes"] +---- +NAME AGE +devworkspace-controller 5m +---- + . For a fresh installation of {prod-short}, generate metrics by creating a {prod-short} workspace from the Dashboard. . In the *Administrator* view of the OpenShift web console, go to *Observe* -> *Metrics*. @@ -66,19 +52,20 @@ For more metrics, see xref:ref_devworkspace-specific-metrics[]. [TIP] ==== -To troubleshoot missing metrics, view the Prometheus container logs for possible RBAC-related errors: +To troubleshoot missing metrics: -. Get the name of the Prometheus pod: +. Verify the ServiceMonitor, Role, and RoleBinding were created by the {prod-short} Operator: + -[source,yaml,subs="+quotes"] +[source,terminal,subs="+attributes,quotes"] ---- -$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' +$ oc get servicemonitor,role,rolebinding -n {prod-namespace} | grep -E "devworkspace|prometheus" ---- -. Print the last 20 lines of the Prometheus container logs from the Prometheus pod from the previous step: +. View the Prometheus container logs for possible RBAC-related errors: + [source,yaml,subs="+quotes"] ---- +$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' $ oc logs --tail=20 ____ -c prometheus -n openshift-monitoring ---- diff --git a/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc b/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc index ae1669ce81..db6729fe71 100644 --- a/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc +++ b/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc @@ -1,14 +1,20 @@ // monitoring-{prod-id-short} [id="enabling-and-exposing-{prod-id-short}-metrics"] -= Enabling and exposing {prod-short} Server metrics += Enabling and exposing metrics -{prod-short} exposes the JVM metrics on port `8087` of the `che-host` Service. -You can configure this behaviour. +You can enable metrics collection for {prod-short} Server and the {devworkspace} Operator. +When enabled, the {prod-short} Operator automatically creates and manages the necessary Prometheus resources (ServiceMonitor, Role, and RoleBinding). + +.Prerequisites + +* An active `{orch-cli}` session with administrative permissions to the destination {orch-name} cluster. See {orch-cli-link}. + +* Prometheus Operator is installed on the cluster. .Procedure -* Configure the `CheCluster` Custom Resource. See xref:using-the-cli-to-configure-the-checluster-custom-resource.adoc[]. +* Configure the `CheCluster` Custom Resource to enable or disable metrics collection. See xref:using-the-cli-to-configure-the-checluster-custom-resource.adoc[]. + [source,yaml,subs="+attributes,+quotes"] ---- @@ -16,5 +22,9 @@ spec: components: metrics: enable: ____ <1> + devEnvironments: + metrics: + enable: ____ <2> ---- -<1> `true` to enable, `false` to disable. +<1> Enables {prod-short} Server JVM metrics on port `8087` of the `che-host` Service. Default: `false`. +<2> Enables {devworkspace} Operator metrics on port `8443` of the `devworkspace-controller-metrics` Service. Default: `true`.