Skip to content

Commit 371ee72

Browse files
committed
MON-4036: Add TelemeterClientConfig to ClusterMonitoring API
Migrate the telemeter-client configmap settings to a CRD field within ClusterMonitoringSpec in config/v1alpha1. The new TelemeterClientConfig struct supports: - nodeSelector: pod scheduling to specific nodes - resources: compute resource requests and limits - tolerations: pod tolerations for scheduling - topologySpreadConstraints: pod distribution across topology domains Signed-off-by: Daniel Mellado <dmellado@fedoraproject.org>
1 parent f6ee4c0 commit 371ee72

8 files changed

Lines changed: 1858 additions & 97 deletions

File tree

config/v1alpha1/tests/clustermonitorings.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 516 additions & 46 deletions
Large diffs are not rendered by default.

config/v1alpha1/types_cluster_monitoring.go

Lines changed: 96 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ type ClusterMonitoringSpec struct {
113113
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
114114
// +optional
115115
OpenShiftStateMetricsConfig OpenShiftStateMetricsConfig `json:"openShiftStateMetricsConfig,omitempty,omitzero"`
116+
// telemeterClientConfig is an optional field that can be used to configure the Telemeter Client
117+
// component that runs in the openshift-monitoring namespace. The Telemeter Client collects
118+
// selected monitoring metrics and forwards them to Red Hat for telemetry purposes.
119+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
120+
// +optional
121+
TelemeterClientConfig TelemeterClientConfig `json:"telemeterClientConfig,omitempty,omitzero"`
116122
}
117123

118124
// UserDefinedMonitoring config for user-defined projects.
@@ -202,13 +208,13 @@ type AlertmanagerCustomConfig struct {
202208
// - name: memory
203209
// request: 40Mi
204210
// limit: null
205-
// Maximum length for this list is 10.
211+
// Maximum length for this list is 5.
206212
// Minimum length for this list is 1.
207213
// Each resource name must be unique within this list.
208214
// +optional
209215
// +listType=map
210216
// +listMapKey=name
211-
// +kubebuilder:validation:MaxItems=10
217+
// +kubebuilder:validation:MaxItems=5
212218
// +kubebuilder:validation:MinItems=1
213219
Resources []ContainerResource `json:"resources,omitempty"`
214220
// secrets defines a list of secrets that need to be mounted into the Alertmanager.
@@ -309,6 +315,13 @@ const (
309315
)
310316

311317
// ContainerResource defines a single resource requirement for a container.
318+
// ---
319+
// MaxItems on []ContainerResource fields is kept at 5 to stay within the
320+
// Kubernetes CRD CEL validation cost budget (StaticEstimatedCRDCostLimit).
321+
// The quantity() CEL function has a high fixed estimated cost per invocation,
322+
// and the limit-vs-request comparison rule is costed per maxItems per location.
323+
// With multiple structs in ClusterMonitoringSpec embedding []ContainerResource,
324+
// maxItems > 5 causes the total estimated rule cost to exceed the budget.
312325
// +kubebuilder:validation:XValidation:rule="has(self.request) || has(self.limit)",message="at least one of request or limit must be set"
313326
// +kubebuilder:validation:XValidation:rule="!(has(self.request) && has(self.limit)) || quantity(self.limit).compareTo(quantity(self.request)) >= 0",message="limit must be greater than or equal to request"
314327
type ContainerResource struct {
@@ -407,13 +420,13 @@ type MetricsServerConfig struct {
407420
// - name: memory
408421
// request: 40Mi
409422
// limit: null
410-
// Maximum length for this list is 10.
423+
// Maximum length for this list is 5.
411424
// Minimum length for this list is 1.
412425
// Each resource name must be unique within this list.
413426
// +optional
414427
// +listType=map
415428
// +listMapKey=name
416-
// +kubebuilder:validation:MaxItems=10
429+
// +kubebuilder:validation:MaxItems=5
417430
// +kubebuilder:validation:MinItems=1
418431
Resources []ContainerResource `json:"resources,omitempty"`
419432
// topologySpreadConstraints defines rules for how Metrics Server Pods should be distributed
@@ -478,13 +491,13 @@ type PrometheusOperatorConfig struct {
478491
// - name: memory
479492
// request: 40Mi
480493
// limit: null
481-
// Maximum length for this list is 10.
494+
// Maximum length for this list is 5.
482495
// Minimum length for this list is 1.
483496
// Each resource name must be unique within this list.
484497
// +optional
485498
// +listType=map
486499
// +listMapKey=name
487-
// +kubebuilder:validation:MaxItems=10
500+
// +kubebuilder:validation:MaxItems=5
488501
// +kubebuilder:validation:MinItems=1
489502
Resources []ContainerResource `json:"resources,omitempty"`
490503
// tolerations defines tolerations for the pods.
@@ -542,13 +555,13 @@ type PrometheusOperatorAdmissionWebhookConfig struct {
542555
// - name: memory
543556
// request: 30Mi
544557
// limit: null
545-
// Maximum length for this list is 10.
558+
// Maximum length for this list is 5.
546559
// Minimum length for this list is 1.
547560
// Each resource name must be unique within this list.
548561
// +optional
549562
// +listType=map
550563
// +listMapKey=name
551-
// +kubebuilder:validation:MaxItems=10
564+
// +kubebuilder:validation:MaxItems=5
552565
// +kubebuilder:validation:MinItems=1
553566
Resources []ContainerResource `json:"resources,omitempty"`
554567
// topologySpreadConstraints defines rules for how admission webhook Pods should be distributed
@@ -602,13 +615,13 @@ type OpenShiftStateMetricsConfig struct {
602615
// - name: memory
603616
// request: 32Mi
604617
// limit: null
605-
// Maximum length for this list is 10.
618+
// Maximum length for this list is 5.
606619
// Minimum length for this list is 1.
607620
// Each resource name must be unique within this list.
608621
// +optional
609622
// +listType=map
610623
// +listMapKey=name
611-
// +kubebuilder:validation:MaxItems=10
624+
// +kubebuilder:validation:MaxItems=5
612625
// +kubebuilder:validation:MinItems=1
613626
Resources []ContainerResource `json:"resources,omitempty"`
614627
// tolerations defines tolerations for the pods.
@@ -645,6 +658,79 @@ type OpenShiftStateMetricsConfig struct {
645658
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
646659
}
647660

661+
// TelemeterClientConfig provides configuration options for the Telemeter Client component
662+
// that runs in the `openshift-monitoring` namespace. The Telemeter Client collects selected
663+
// monitoring metrics and forwards them to Red Hat for telemetry purposes.
664+
// +kubebuilder:validation:MinProperties=1
665+
type TelemeterClientConfig struct {
666+
// nodeSelector defines the nodes on which the Pods are scheduled.
667+
// This field is optional.
668+
//
669+
// When omitted, this means the user has no opinion and the platform is left
670+
// to choose reasonable defaults. These defaults are subject to change over time.
671+
// The current default value is `kubernetes.io/os: linux`.
672+
// When specified, nodeSelector must contain at least 1 entry and must not contain more than 10 entries.
673+
// +optional
674+
// +kubebuilder:validation:MinProperties=1
675+
// +kubebuilder:validation:MaxProperties=10
676+
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
677+
// resources defines the compute resource requests and limits for the Telemeter Client container.
678+
// This includes CPU, memory and HugePages constraints to help control scheduling and resource usage.
679+
// When not specified, defaults are used by the platform. Requests cannot exceed limits.
680+
// This field is optional.
681+
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
682+
// This is a simplified API that maps to Kubernetes ResourceRequirements.
683+
// The current default values are:
684+
// resources:
685+
// - name: cpu
686+
// request: 1m
687+
// limit: null
688+
// - name: memory
689+
// request: 40Mi
690+
// limit: null
691+
// Maximum length for this list is 5.
692+
// Minimum length for this list is 1.
693+
// Each resource name must be unique within this list.
694+
// +optional
695+
// +listType=map
696+
// +listMapKey=name
697+
// +kubebuilder:validation:MaxItems=5
698+
// +kubebuilder:validation:MinItems=1
699+
Resources []ContainerResource `json:"resources,omitempty"`
700+
// tolerations defines tolerations for the pods.
701+
// This field is optional.
702+
//
703+
// When omitted, this means the user has no opinion and the platform is left
704+
// to choose reasonable defaults. These defaults are subject to change over time.
705+
// Default is empty list.
706+
// Maximum length for this list is 10.
707+
// Minimum length for this list is 1.
708+
// +kubebuilder:validation:MaxItems=10
709+
// +kubebuilder:validation:MinItems=1
710+
// +listType=atomic
711+
// +optional
712+
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
713+
// topologySpreadConstraints defines rules for how Telemeter Client Pods should be distributed
714+
// across topology domains such as zones, nodes, or other user-defined labels.
715+
// This field is optional.
716+
// This helps improve high availability and resource efficiency by avoiding placing
717+
// too many replicas in the same failure domain.
718+
//
719+
// When omitted, this means no opinion and the platform is left to choose a default, which is subject to change over time.
720+
// This field maps directly to the `topologySpreadConstraints` field in the Pod spec.
721+
// Default is empty list.
722+
// Maximum length for this list is 10.
723+
// Minimum length for this list is 1.
724+
// Entries must have unique topologyKey and whenUnsatisfiable pairs.
725+
// +kubebuilder:validation:MaxItems=10
726+
// +kubebuilder:validation:MinItems=1
727+
// +listType=map
728+
// +listMapKey=topologyKey
729+
// +listMapKey=whenUnsatisfiable
730+
// +optional
731+
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
732+
}
733+
648734
// AuditProfile defines the audit log level for the Metrics Server.
649735
// +kubebuilder:validation:Enum=None;Metadata;Request;RequestResponse
650736
type AuditProfile string

0 commit comments

Comments
 (0)