diff --git a/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc b/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc index b86014256a..36c7b53c2a 100644 --- a/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc +++ b/modules/administration-guide/partials/proc_collecting-che-metrics-with-prometheus.adoc @@ -11,95 +11,37 @@ To use the in-cluster Prometheus instance to collect, store, and query JVM metri * An active `oc` session with administrative permissions to the destination OpenShift cluster. See link:https://docs.openshift.com/container-platform/{ocp4-ver}/cli_reference/openshift_cli/getting-started-cli.html[Getting started with the CLI]. -* {prod-short} is exposing metrics on port `8087`. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling and exposing {prod-short} server JVM metrics]. +* {prod-short} Server metrics are enabled. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling and exposing {prod-short} Server JVM metrics]. + +* Prometheus Operator is installed on the cluster. .Procedure -. Create the ServiceMonitor for detecting the {prod-short} JVM metrics Service. +. Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + -.ServiceMonitor -==== -[source,yaml,subs="+quotes,+attributes,+macros"] +[source,terminal,subs="+attributes,quotes"] ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: che-host - namespace: {prod-namespace} <1> -spec: - endpoints: - - interval: 10s <2> - port: metrics - scheme: http - namespaceSelector: - matchNames: - - {prod-namespace} <1> - selector: - matchLabels: - app.kubernetes.io/name: {prod-deployment} +$ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true ---- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -<2> The rate at which a target is scraped. -==== - -. Create a Role and RoleBinding to allow Prometheus to view the metrics. - + -.Role -==== -[source,yaml,subs="+quotes,+attributes,+macros"] ----- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: prometheus-k8s - namespace: {prod-namespace} <1> -rules: - - verbs: - - get - - list - - watch - apiGroups: - - '' - resources: - - services - - endpoints - - pods ----- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -==== +NOTE: The {prod-short} Operator automatically creates and manages the ServiceMonitor, Role, and RoleBinding resources required for Prometheus to scrape {prod-short} Server metrics when metrics are enabled. + +.Verification +. Verify that the ServiceMonitor has been created: + -.RoleBinding -==== -[source,yaml,subs="+quotes,+attributes,+macros"] +[source,terminal,subs="+attributes,quotes"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: view-{prod-id-short}-openshift-monitoring-prometheus-k8s - namespace: {prod-namespace} <1> -subjects: - - kind: ServiceAccount - name: prometheus-k8s - namespace: openshift-monitoring -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s +$ oc get servicemonitor -n {prod-namespace} ---- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -==== - -. Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + -[source,terminal,subs="+attributes,quotes"] +.Example output +[source,terminal,subs="+attributes"] ---- -$ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true +NAME AGE +che-host 5m ---- -.Verification - . In the *Administrator* view of the OpenShift web console, go to *Observe* -> *Metrics*. . Run a PromQL query to confirm that the metrics are available. For example, enter `process_uptime_seconds{job="che-host"}` and click *Run queries*. @@ -107,19 +49,20 @@ $ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true [TIP] ==== -To troubleshoot missing metrics, view the Prometheus container logs for possible RBAC-related errors: +To troubleshoot missing metrics: -. Get the name of the Prometheus pod: +. Verify the ServiceMonitor, Role, and RoleBinding were created by the {prod-short} Operator: + -[source,yaml,subs="+quotes"] +[source,terminal,subs="+attributes,quotes"] ---- -$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' +$ oc get servicemonitor,role,rolebinding -n {prod-namespace} | grep -E "che-host|prometheus" ---- -. Print the last 20 lines of the Prometheus container logs from the Prometheus pod from the previous step: +. View the Prometheus container logs for possible RBAC-related errors: + [source,yaml,subs="+quotes"] ---- +$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' $ oc logs --tail=20 ____ -c prometheus -n openshift-monitoring ---- diff --git a/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc b/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc index b198f24bd1..3ae0ac0cbf 100644 --- a/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc +++ b/modules/administration-guide/partials/proc_collecting-dev-workspace-operator-metrics-with-prometheus.adoc @@ -10,41 +10,11 @@ To use the in-cluster Prometheus instance to collect, store, and query metrics a * An active `oc` session with administrative permissions to the destination OpenShift cluster. See link:https://docs.openshift.com/container-platform/{ocp4-ver}/cli_reference/openshift_cli/getting-started-cli.html[Getting started with the CLI]. -* The `devworkspace-controller-metrics` Service is exposing metrics on port `8443`. This is preconfigured by default. +* {devworkspace} Operator metrics are enabled. See xref:enabling-and-exposing-{prod-id-short}-metrics[Enabling {devworkspace} Operator metrics]. -.Procedure - -. Create the ServiceMonitor for detecting the Dev Workspace Operator metrics Service. -+ -.ServiceMonitor -==== -[source,yaml,subs="+quotes,+attributes,+macros"] ----- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: devworkspace-controller - namespace: {prod-namespace} <1> -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 10s <2> - port: metrics - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - openshift-operators - selector: - matchLabels: - app.kubernetes.io/name: devworkspace-controller ----- -<1> The {prod-short} namespace. The default is `{prod-namespace}`. -<2> The rate at which a target is scraped. -==== +* Prometheus Operator is installed on the cluster. -include::example$snip_{project-context}-create-a-role-and-rolebinding-for-prometheus-to-view-metrics.adoc[] +.Procedure . Allow the in-cluster Prometheus instance to detect the ServiceMonitor in the {prod-short} namespace. The default {prod-short} namespace is `{prod-namespace}`. + @@ -52,9 +22,25 @@ include::example$snip_{project-context}-create-a-role-and-rolebinding-for-promet ---- $ oc label namespace {prod-namespace} openshift.io/cluster-monitoring=true ---- ++ +NOTE: The {prod-short} Operator automatically creates and manages the ServiceMonitor, Role, and RoleBinding resources required for Prometheus to scrape {devworkspace} Operator metrics when metrics are enabled. .Verification +. Verify that the ServiceMonitor has been created: ++ +[source,terminal,subs="+attributes,quotes"] +---- +$ oc get servicemonitor -n {prod-namespace} +---- ++ +.Example output +[source,terminal,subs="+attributes"] +---- +NAME AGE +devworkspace-controller 5m +---- + . For a fresh installation of {prod-short}, generate metrics by creating a {prod-short} workspace from the Dashboard. . In the *Administrator* view of the OpenShift web console, go to *Observe* -> *Metrics*. @@ -66,19 +52,20 @@ For more metrics, see xref:ref_devworkspace-specific-metrics[]. [TIP] ==== -To troubleshoot missing metrics, view the Prometheus container logs for possible RBAC-related errors: +To troubleshoot missing metrics: -. Get the name of the Prometheus pod: +. Verify the ServiceMonitor, Role, and RoleBinding were created by the {prod-short} Operator: + -[source,yaml,subs="+quotes"] +[source,terminal,subs="+attributes,quotes"] ---- -$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' +$ oc get servicemonitor,role,rolebinding -n {prod-namespace} | grep -E "devworkspace|prometheus" ---- -. Print the last 20 lines of the Prometheus container logs from the Prometheus pod from the previous step: +. View the Prometheus container logs for possible RBAC-related errors: + [source,yaml,subs="+quotes"] ---- +$ oc get pods -l app.kubernetes.io/name=prometheus -n openshift-monitoring -o=jsonpath='{.items[*].metadata.name}' $ oc logs --tail=20 ____ -c prometheus -n openshift-monitoring ---- diff --git a/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc b/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc index ae1669ce81..db6729fe71 100644 --- a/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc +++ b/modules/administration-guide/partials/proc_enabling-and-exposing-che-metrics.adoc @@ -1,14 +1,20 @@ // monitoring-{prod-id-short} [id="enabling-and-exposing-{prod-id-short}-metrics"] -= Enabling and exposing {prod-short} Server metrics += Enabling and exposing metrics -{prod-short} exposes the JVM metrics on port `8087` of the `che-host` Service. -You can configure this behaviour. +You can enable metrics collection for {prod-short} Server and the {devworkspace} Operator. +When enabled, the {prod-short} Operator automatically creates and manages the necessary Prometheus resources (ServiceMonitor, Role, and RoleBinding). + +.Prerequisites + +* An active `{orch-cli}` session with administrative permissions to the destination {orch-name} cluster. See {orch-cli-link}. + +* Prometheus Operator is installed on the cluster. .Procedure -* Configure the `CheCluster` Custom Resource. See xref:using-the-cli-to-configure-the-checluster-custom-resource.adoc[]. +* Configure the `CheCluster` Custom Resource to enable or disable metrics collection. See xref:using-the-cli-to-configure-the-checluster-custom-resource.adoc[]. + [source,yaml,subs="+attributes,+quotes"] ---- @@ -16,5 +22,9 @@ spec: components: metrics: enable: ____ <1> + devEnvironments: + metrics: + enable: ____ <2> ---- -<1> `true` to enable, `false` to disable. +<1> Enables {prod-short} Server JVM metrics on port `8087` of the `che-host` Service. Default: `false`. +<2> Enables {devworkspace} Operator metrics on port `8443` of the `devworkspace-controller-metrics` Service. Default: `true`.