diff --git a/cmd/main.go b/cmd/main.go index 8cd103c..28b1cde 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -41,13 +41,13 @@ func main() { var enableHTTP2 bool var tlsOpts []func(*tls.Config) - flag.StringVar(&metricsAddr, "metrics-bind-address", ":8443", + flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metrics endpoint binds to. Use 0 to disable.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", true, "Enable leader election for controller manager.") - flag.BoolVar(&secureMetrics, "metrics-secure", true, + flag.BoolVar(&secureMetrics, "metrics-secure", false, "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") flag.StringVar(&metricsCertPath, "metrics-cert-path", "", "The directory that contains the metrics server certificate.") @@ -153,9 +153,11 @@ func main() { platform.SnapshotRegion = v } + nodeRecorder := mgr.GetEventRecorderFor("seinode-controller") if err := (&nodecontroller.SeiNodeReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), + Recorder: nodeRecorder, Platform: platform, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "Failed to create controller", "controller", "SeiNode") diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 440434d..1808362 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -8,3 +8,4 @@ resources: - ../rbac - ../manager - ../network-policy + - ../monitoring diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index b382061..75331a3 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -29,7 +29,7 @@ spec: - command: - /manager args: - - --metrics-bind-address=:8443 + - --metrics-bind-address=:8080 - --leader-elect - --health-probe-bind-address=:8081 image: 189176372795.dkr.ecr.us-east-2.amazonaws.com/sei/sei-k8s-controller@sha256:87b8b8ed04013f23ada6f2a34e162c98708c90424b2e6cd552bc7ac1e4284d9f @@ -63,7 +63,10 @@ spec: value: eu-central-1 - name: SEI_CONTROLLER_SA_PRINCIPAL value: "cluster.local/ns/sei-k8s-controller-system/sa/sei-k8s-controller-manager" - ports: [] + ports: + - containerPort: 8080 + name: metrics + protocol: TCP securityContext: readOnlyRootFilesystem: true allowPrivilegeEscalation: false diff --git a/config/manager/metrics_service.yaml b/config/manager/metrics_service.yaml index ac7d1de..f19bb8e 100644 --- a/config/manager/metrics_service.yaml +++ b/config/manager/metrics_service.yaml @@ -9,10 +9,10 @@ metadata: namespace: system spec: ports: - - name: https - port: 8443 + - name: http-metrics + port: 8080 protocol: TCP - targetPort: 8443 + targetPort: 8080 selector: control-plane: controller-manager app.kubernetes.io/name: sei-k8s-controller diff --git a/config/monitoring/kustomization.yaml b/config/monitoring/kustomization.yaml new file mode 100644 index 0000000..c295252 --- /dev/null +++ b/config/monitoring/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - service-monitor.yaml + - prometheus-rule.yaml diff --git a/config/monitoring/prometheus-rule.yaml b/config/monitoring/prometheus-rule.yaml new file mode 100644 index 0000000..4638287 --- /dev/null +++ b/config/monitoring/prometheus-rule.yaml @@ -0,0 +1,82 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: controller-alerts + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: sei-k8s-controller + app.kubernetes.io/managed-by: kustomize +spec: + groups: + - name: sei-controller + rules: + - alert: SeiNodeGroupDegraded + expr: sei_controller_seinodegroup_phase{phase="Degraded"} == 1 + for: 10m + labels: + severity: warning + team: platform + annotations: + summary: "SeiNodeGroup {{ $labels.namespace }}/{{ $labels.name }} is degraded" + description: "Some nodes are not ready. Check child SeiNode status." + + - alert: SeiNodeGroupFailed + expr: sei_controller_seinodegroup_phase{phase="Failed"} == 1 + for: 5m + labels: + severity: critical + team: platform + annotations: + summary: "SeiNodeGroup {{ $labels.namespace }}/{{ $labels.name }} has failed" + description: "All nodes have failed. Immediate investigation required." + + - alert: SeiNodeStuckInitializing + expr: sei_controller_seinode_phase{phase="Initializing"} == 1 + for: 30m + labels: + severity: warning + team: platform + annotations: + summary: "SeiNode {{ $labels.namespace }}/{{ $labels.name }} stuck initializing" + description: "Node has been in Initializing phase for over 30 minutes." + + - alert: SeiNodeStuckPending + expr: max by (namespace, name) (sei_controller_seinode_phase{phase=~"Pending|PreInitializing"}) == 1 + for: 15m + labels: + severity: warning + team: platform + annotations: + summary: "SeiNode {{ $labels.namespace }}/{{ $labels.name }} stuck pending" + description: "Node has been in Pending/PreInitializing phase for over 15 minutes." + + - alert: SidecarUnreachableHigh + expr: rate(sei_controller_sidecar_unreachable_total[5m]) > 0.5 + for: 10m + labels: + severity: warning + team: platform + annotations: + summary: "Sidecar for {{ $labels.namespace }}/{{ $labels.node }} is frequently unreachable" + description: "Sidecar connectivity failures sustained above 0.5/s for 10 minutes." + + - alert: ControllerReconcileErrors + expr: increase(sei_controller_reconcile_errors_total[15m]) > 5 + for: 5m + labels: + severity: warning + team: platform + annotations: + summary: "Controller {{ $labels.controller }} has elevated reconcile errors" + description: "More than 5 reconcile errors in the last 15 minutes for {{ $labels.namespace }}/{{ $labels.name }}." + + - alert: ControllerHighReconcileLatency + expr: histogram_quantile(0.99, rate(sei_controller_seinodegroup_reconcile_substep_duration_seconds_bucket[5m])) > 10 + for: 10m + labels: + severity: warning + team: platform + annotations: + summary: "SeiNodeGroup reconcile substep {{ $labels.substep }} is slow" + description: "p99 latency above 10s for 10 minutes." diff --git a/config/monitoring/service-monitor.yaml b/config/monitoring/service-monitor.yaml new file mode 100644 index 0000000..30a4373 --- /dev/null +++ b/config/monitoring/service-monitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: controller-manager-metrics + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: sei-k8s-controller + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: sei-k8s-controller + endpoints: + - port: http-metrics + path: /metrics + interval: 30s diff --git a/config/network-policy/allow-metrics-traffic.yaml b/config/network-policy/allow-metrics-traffic.yaml index 3871532..2ec5842 100644 --- a/config/network-policy/allow-metrics-traffic.yaml +++ b/config/network-policy/allow-metrics-traffic.yaml @@ -19,5 +19,5 @@ spec: matchLabels: metrics: enabled ports: - - port: 8443 + - port: 8080 protocol: TCP diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 0ee23d2..e6ecc0a 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -11,6 +11,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -68,6 +69,7 @@ func DefaultPlatformConfig() PlatformConfig { type SeiNodeReconciler struct { client.Client Scheme *runtime.Scheme + Recorder record.EventRecorder Platform PlatformConfig // BuildSidecarClientFn overrides sidecar client construction for testing. BuildSidecarClientFn func(node *seiv1alpha1.SeiNode) SidecarStatusClient @@ -81,6 +83,7 @@ type SeiNodeReconciler struct { // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch +// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { node := &seiv1alpha1.SeiNode{} @@ -91,6 +94,10 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, err } + if node.Status.Phase != "" { + emitNodePhase(node.Namespace, node.Name, node.Status.Phase) + } + if !node.DeletionTimestamp.IsZero() { return r.handleNodeDeletion(ctx, node) } @@ -144,6 +151,13 @@ func (r *SeiNodeReconciler) reconcilePending(ctx context.Context, node *seiv1alp if err := r.Status().Patch(ctx, node, patch); err != nil { return ctrl.Result{}, fmt.Errorf("initializing plans: %w", err) } + + ns, name := node.Namespace, node.Name + nodePhaseTransitions.WithLabelValues(ns, string(seiv1alpha1.PhasePending), string(seiv1alpha1.PhasePreInitializing)).Inc() + emitNodePhase(ns, name, seiv1alpha1.PhasePreInitializing) + r.Recorder.Eventf(node, corev1.EventTypeNormal, "PhaseTransition", + "Phase changed from %s to %s", seiv1alpha1.PhasePending, seiv1alpha1.PhasePreInitializing) + return ctrl.Result{RequeueAfter: immediateRequeue}, nil } @@ -159,6 +173,7 @@ func (r *SeiNodeReconciler) reconcileInitializing(ctx context.Context, node *sei sc := r.buildSidecarClient(node) if sc == nil { + sidecarUnreachableTotal.WithLabelValues(node.Namespace, node.Name).Inc() log.FromContext(ctx).Info("sidecar not reachable yet, will retry") return ctrl.Result{RequeueAfter: taskPollInterval}, nil } @@ -169,10 +184,10 @@ func (r *SeiNodeReconciler) reconcileInitializing(ctx context.Context, node *sei } if node.Status.InitPlan.Phase == seiv1alpha1.TaskPlanComplete { - return r.setPhase(ctx, node, seiv1alpha1.PhaseRunning) + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseRunning) } if node.Status.InitPlan.Phase == seiv1alpha1.TaskPlanFailed { - return r.setPhase(ctx, node, seiv1alpha1.PhaseFailed) + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseFailed) } return result, nil } @@ -181,19 +196,40 @@ func (r *SeiNodeReconciler) reconcileInitializing(ctx context.Context, node *sei func (r *SeiNodeReconciler) reconcileRunning(ctx context.Context, node *seiv1alpha1.SeiNode) (ctrl.Result, error) { sc := r.buildSidecarClient(node) if sc == nil { + sidecarUnreachableTotal.WithLabelValues(node.Namespace, node.Name).Inc() log.FromContext(ctx).Info("sidecar not reachable, will retry") return ctrl.Result{RequeueAfter: statusPollInterval}, nil } return r.reconcileRuntimeTasks(ctx, node, sc) } -// setPhase transitions the node to a new phase. -func (r *SeiNodeReconciler) setPhase(ctx context.Context, node *seiv1alpha1.SeiNode, phase seiv1alpha1.SeiNodePhase) (ctrl.Result, error) { +// transitionPhase transitions the node to a new phase and emits the associated +// metric counter, phase gauge, and Kubernetes event. +func (r *SeiNodeReconciler) transitionPhase(ctx context.Context, node *seiv1alpha1.SeiNode, phase seiv1alpha1.SeiNodePhase) (ctrl.Result, error) { + prev := node.Status.Phase + if prev == "" { + prev = seiv1alpha1.PhasePending + } + patch := client.MergeFrom(node.DeepCopy()) node.Status.Phase = phase if err := r.Status().Patch(ctx, node, patch); err != nil { return ctrl.Result{}, fmt.Errorf("setting phase to %s: %w", phase, err) } + + ns, name := node.Namespace, node.Name + nodePhaseTransitions.WithLabelValues(ns, string(prev), string(phase)).Inc() + emitNodePhase(ns, name, phase) + + if phase == seiv1alpha1.PhaseRunning { + dur := time.Since(node.CreationTimestamp.Time).Seconds() + nodeInitDuration.WithLabelValues(ns, node.Spec.ChainID).Observe(dur) + nodeLastInitDuration.WithLabelValues(ns, name).Set(dur) + } + + r.Recorder.Eventf(node, corev1.EventTypeNormal, "PhaseTransition", + "Phase changed from %s to %s", prev, phase) + return ctrl.Result{RequeueAfter: immediateRequeue}, nil } @@ -235,6 +271,8 @@ func (r *SeiNodeReconciler) handleNodeDeletion(ctx context.Context, node *seiv1a } } + cleanupNodeMetrics(node.Namespace, node.Name) + controllerutil.RemoveFinalizer(node, nodeFinalizerName) return ctrl.Result{}, r.Update(ctx, node) } diff --git a/internal/controller/node/metrics.go b/internal/controller/node/metrics.go new file mode 100644 index 0000000..71436b9 --- /dev/null +++ b/internal/controller/node/metrics.go @@ -0,0 +1,92 @@ +package node + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/observability" +) + +var allNodePhases = []string{ + string(seiv1alpha1.PhasePending), + string(seiv1alpha1.PhasePreInitializing), + string(seiv1alpha1.PhaseInitializing), + string(seiv1alpha1.PhaseRunning), + string(seiv1alpha1.PhaseFailed), + string(seiv1alpha1.PhaseTerminating), +} + +var ( + nodePhaseGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "sei_controller_seinode_phase", + Help: "Current phase of each SeiNode (1=active, 0=inactive)", + }, + []string{"namespace", "name", "phase"}, + ) + + nodePhaseTransitions = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "sei_controller_seinode_phase_transitions_total", + Help: "Phase state machine transitions", + }, + []string{"namespace", "from", "to"}, + ) + + nodeInitDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "sei_controller_seinode_init_duration_seconds", + Help: "Time from Pending to Running", + Buckets: observability.InitBuckets, + }, + []string{"namespace", "chain_id"}, + ) + + nodeLastInitDuration = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "sei_controller_seinode_last_init_duration_seconds", + Help: "Per-node init duration, set once when node reaches Running", + }, + []string{"namespace", "name"}, + ) + + sidecarRequestDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "sei_controller_sidecar_request_duration_seconds", + Help: "Duration of HTTP requests to the seictl sidecar", + Buckets: observability.ReconcileBuckets, + }, + []string{"namespace", "method", "route", "status_code"}, + ) + + sidecarUnreachableTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "sei_controller_sidecar_unreachable_total", + Help: "Number of times the sidecar was unreachable", + }, + []string{"namespace", "node"}, + ) +) + +func init() { + metrics.Registry.MustRegister( + nodePhaseGauge, + nodePhaseTransitions, + nodeInitDuration, + nodeLastInitDuration, + sidecarRequestDuration, + sidecarUnreachableTotal, + ) +} + +func emitNodePhase(ns, name string, phase seiv1alpha1.SeiNodePhase) { + observability.EmitPhaseGauge(nodePhaseGauge, ns, name, string(phase), allNodePhases) +} + +func cleanupNodeMetrics(namespace, name string) { + observability.DeletePhaseGauge(nodePhaseGauge, namespace, name, allNodePhases) + nodeLastInitDuration.DeleteLabelValues(namespace, name) + sidecarUnreachableTotal.DeleteLabelValues(namespace, name) + observability.ReconcileErrorsTotal.DeleteLabelValues(seiNodeControllerName, namespace, name) +} diff --git a/internal/controller/node/plan_execution_test.go b/internal/controller/node/plan_execution_test.go index 4aae881..a5e541e 100644 --- a/internal/controller/node/plan_execution_test.go +++ b/internal/controller/node/plan_execution_test.go @@ -15,6 +15,7 @@ import ( k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -117,6 +118,7 @@ func newProgressionReconciler(t *testing.T, mock *mockSidecarClient, objs ...cli r := &SeiNodeReconciler{ Client: c, Scheme: s, + Recorder: record.NewFakeRecorder(100), Platform: DefaultPlatformConfig(), BuildSidecarClientFn: func(_ *seiv1alpha1.SeiNode) SidecarStatusClient { return mock diff --git a/internal/controller/node/pre_init.go b/internal/controller/node/pre_init.go index dade841..100cd3d 100644 --- a/internal/controller/node/pre_init.go +++ b/internal/controller/node/pre_init.go @@ -35,13 +35,13 @@ func (r *SeiNodeReconciler) reconcilePreInitializing(ctx context.Context, node * if err := r.cleanupPreInit(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("cleaning up pre-init resources: %w", err) } - return r.setPhase(ctx, node, seiv1alpha1.PhaseInitializing) + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseInitializing) } if plan.Phase == seiv1alpha1.TaskPlanFailed { - // if err := r.cleanupPreInit(ctx, node); err != nil { - // log.FromContext(ctx).Error(err, "failed to clean up pre-init resources after plan failure") - // } - return r.setPhase(ctx, node, seiv1alpha1.PhaseFailed) + if err := r.cleanupPreInit(ctx, node); err != nil { + log.FromContext(ctx).Error(err, "failed to clean up pre-init resources after plan failure") + } + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseFailed) } if len(plan.Tasks) == 0 { @@ -109,13 +109,13 @@ func (r *SeiNodeReconciler) reconcilePreInitializing(ctx context.Context, node * if err := r.cleanupPreInit(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("cleaning up pre-init resources: %w", err) } - return r.setPhase(ctx, node, seiv1alpha1.PhaseInitializing) + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseInitializing) } if plan.Phase == seiv1alpha1.TaskPlanFailed { - // if err := r.cleanupPreInit(ctx, node); err != nil { - // log.FromContext(ctx).Error(err, "failed to clean up pre-init resources after plan failure") - // } - return r.setPhase(ctx, node, seiv1alpha1.PhaseFailed) + if err := r.cleanupPreInit(ctx, node); err != nil { + log.FromContext(ctx).Error(err, "failed to clean up pre-init resources after plan failure") + } + return r.transitionPhase(ctx, node, seiv1alpha1.PhaseFailed) } return result, nil } diff --git a/internal/controller/node/reconciler_test.go b/internal/controller/node/reconciler_test.go index 4761c65..a929d61 100644 --- a/internal/controller/node/reconciler_test.go +++ b/internal/controller/node/reconciler_test.go @@ -11,6 +11,7 @@ import ( k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -41,6 +42,7 @@ func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, r := &SeiNodeReconciler{ Client: c, Scheme: s, + Recorder: record.NewFakeRecorder(100), Platform: DefaultPlatformConfig(), BuildSidecarClientFn: func(_ *seiv1alpha1.SeiNode) SidecarStatusClient { return &mockSidecarClient{} diff --git a/internal/controller/nodegroup/controller.go b/internal/controller/nodegroup/controller.go index e5d3ae5..7b79157 100644 --- a/internal/controller/nodegroup/controller.go +++ b/internal/controller/nodegroup/controller.go @@ -15,6 +15,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/observability" ) const ( @@ -70,19 +71,29 @@ func (r *SeiNodeGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request // Conditions set during networking/monitoring reconciliation are captured // in the diff when updateStatus patches against this base. statusBase := client.MergeFrom(group.DeepCopy()) + ns, name := group.Namespace, group.Name - if err := r.reconcileSeiNodes(ctx, group); err != nil { + if err := timeSubstep("reconcileSeiNodes", func() error { + return r.reconcileSeiNodes(ctx, group) + }); err != nil { logger.Error(err, "reconciling SeiNodes") + observability.ReconcileErrorsTotal.WithLabelValues(controllerName, ns, name).Inc() return ctrl.Result{}, fmt.Errorf("reconciling SeiNodes: %w", err) } - if err := r.reconcileNetworking(ctx, group); err != nil { + if err := timeSubstep("reconcileNetworking", func() error { + return r.reconcileNetworking(ctx, group) + }); err != nil { logger.Error(err, "reconciling networking") + observability.ReconcileErrorsTotal.WithLabelValues(controllerName, ns, name).Inc() return ctrl.Result{}, fmt.Errorf("reconciling networking: %w", err) } - if err := r.reconcileMonitoring(ctx, group); err != nil { + if err := timeSubstep("reconcileMonitoring", func() error { + return r.reconcileMonitoring(ctx, group) + }); err != nil { logger.Error(err, "reconciling monitoring") + observability.ReconcileErrorsTotal.WithLabelValues(controllerName, ns, name).Inc() return ctrl.Result{}, fmt.Errorf("reconciling monitoring: %w", err) } @@ -90,6 +101,10 @@ func (r *SeiNodeGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, fmt.Errorf("updating status: %w", err) } + emitGroupPhase(ns, name, group.Status.Phase) + emitGroupReplicas(ns, name, group.Spec.Replicas, group.Status.ReadyReplicas) + emitGroupConditions(ns, name, group.Status.Conditions) + return ctrl.Result{RequeueAfter: statusPollInterval}, nil } @@ -133,6 +148,8 @@ func (r *SeiNodeGroupReconciler) handleDeletion(ctx context.Context, group *seiv } } + cleanupGroupMetrics(group.Namespace, group.Name) + finalizerPatch := client.MergeFrom(group.DeepCopy()) controllerutil.RemoveFinalizer(group, groupFinalizerName) return ctrl.Result{}, r.Patch(ctx, group, finalizerPatch) diff --git a/internal/controller/nodegroup/metrics.go b/internal/controller/nodegroup/metrics.go new file mode 100644 index 0000000..63745e4 --- /dev/null +++ b/internal/controller/nodegroup/metrics.go @@ -0,0 +1,121 @@ +package nodegroup + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/metrics" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/observability" +) + +var allGroupPhases = []string{ + string(seiv1alpha1.GroupPhasePending), + string(seiv1alpha1.GroupPhaseInitializing), + string(seiv1alpha1.GroupPhaseReady), + string(seiv1alpha1.GroupPhaseDegraded), + string(seiv1alpha1.GroupPhaseFailed), + string(seiv1alpha1.GroupPhaseTerminating), +} + +var allConditionTypes = []string{ + seiv1alpha1.ConditionNodesReady, + seiv1alpha1.ConditionExternalServiceReady, + seiv1alpha1.ConditionRouteReady, + seiv1alpha1.ConditionIsolationReady, + seiv1alpha1.ConditionServiceMonitorReady, +} + +var ( + groupPhaseGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "sei_controller_seinodegroup_phase", + Help: "Current phase of each SeiNodeGroup (1=active, 0=inactive)", + }, + []string{"namespace", "name", "phase"}, + ) + + groupReplicasGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "sei_controller_seinodegroup_replicas", + Help: "Replica counts for each SeiNodeGroup", + }, + []string{"namespace", "name", "type"}, + ) + + groupConditionGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "sei_controller_seinodegroup_condition", + Help: "Condition status for each SeiNodeGroup (1=match, 0=no match)", + }, + []string{"namespace", "name", "type", "status"}, + ) + + reconcileSubstepDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "sei_controller_seinodegroup_reconcile_substep_duration_seconds", + Help: "Duration of individual reconcile substeps", + Buckets: observability.ReconcileBuckets, + }, + []string{"controller", "substep"}, + ) +) + +func init() { + metrics.Registry.MustRegister( + groupPhaseGauge, + groupReplicasGauge, + groupConditionGauge, + reconcileSubstepDuration, + ) +} + +func emitGroupPhase(ns, name string, phase seiv1alpha1.SeiNodeGroupPhase) { + observability.EmitPhaseGauge(groupPhaseGauge, ns, name, string(phase), allGroupPhases) +} + +func emitGroupReplicas(ns, name string, desired, ready int32) { + groupReplicasGauge.WithLabelValues(ns, name, "desired").Set(float64(desired)) + groupReplicasGauge.WithLabelValues(ns, name, "ready").Set(float64(ready)) +} + +func emitGroupConditions(ns, name string, conditions []metav1.Condition) { + for _, cond := range allConditionTypes { + current := "Unknown" + for i := range conditions { + if conditions[i].Type == cond { + current = string(conditions[i].Status) + break + } + } + for _, s := range []string{"True", "False", "Unknown"} { + val := 0.0 + if s == current { + val = 1.0 + } + groupConditionGauge.WithLabelValues(ns, name, cond, s).Set(val) + } + } +} + +func timeSubstep(substep string, fn func() error) error { + start := time.Now() + err := fn() + reconcileSubstepDuration.WithLabelValues(controllerName, substep).Observe(time.Since(start).Seconds()) + return err +} + +func cleanupGroupMetrics(namespace, name string) { + observability.DeletePhaseGauge(groupPhaseGauge, namespace, name, allGroupPhases) + for _, typ := range []string{"desired", "ready"} { + groupReplicasGauge.DeleteLabelValues(namespace, name, typ) + } + for _, cond := range allConditionTypes { + for _, status := range []string{"True", "False", "Unknown"} { + groupConditionGauge.DeleteLabelValues(namespace, name, cond, status) + } + } + observability.ReconcileErrorsTotal.DeleteLabelValues(controllerName, namespace, name) +} diff --git a/internal/controller/observability/metrics.go b/internal/controller/observability/metrics.go new file mode 100644 index 0000000..7f1d3b6 --- /dev/null +++ b/internal/controller/observability/metrics.go @@ -0,0 +1,87 @@ +package observability + +import ( + "fmt" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// ReconcileBuckets extends prometheus.DefBuckets to 60s, covering slow +// API server writes, sidecar interactions, and controller-runtime's +// default reconcile timeout. +var ReconcileBuckets = []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60} + +// InitBuckets covers 10s to 1h for node initialisation durations +// (PVC binding, snapshot restore, sidecar health). +var InitBuckets = []float64{10, 30, 60, 120, 300, 600, 900, 1200, 1800, 3600} + +// ReconcileErrorsTotal tracks reconcile errors beyond what controller-runtime +// tracks. The "controller" label disambiguates between controllers. +var ReconcileErrorsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "sei_controller_reconcile_errors_total", + Help: "Reconcile errors beyond what controller-runtime tracks", + }, + []string{"controller", "namespace", "name"}, +) + +func init() { + metrics.Registry.MustRegister(ReconcileErrorsTotal) +} + +// EmitPhaseGauge sets 1.0 for the current phase and 0.0 for all others, +// following the kube-state-metrics convention (e.g., kube_pod_status_phase). +func EmitPhaseGauge(gauge *prometheus.GaugeVec, ns, name, current string, allPhases []string) { + for _, p := range allPhases { + val := 0.0 + if p == current { + val = 1.0 + } + gauge.WithLabelValues(ns, name, p).Set(val) + } +} + +// DeletePhaseGauge removes all phase series for a deleted resource. +func DeletePhaseGauge(gauge *prometheus.GaugeVec, ns, name string, allPhases []string) { + for _, p := range allPhases { + gauge.DeleteLabelValues(ns, name, p) + } +} + +// Known sidecar route templates for label normalization. +var knownRoutes = map[string]string{ + "/v0/tasks": "/v0/tasks", + "/v0/healthz": "/v0/healthz", + "/v0/status": "/v0/status", +} + +// NormalizeRoute maps raw HTTP paths to bounded route templates +// to prevent unbounded cardinality from parameterised paths. +func NormalizeRoute(path string) string { + if r, ok := knownRoutes[path]; ok { + return r + } + if strings.HasPrefix(path, "/v0/tasks/") { + return "/v0/tasks/:id" + } + return "other" +} + +// NormalizeStatusCode buckets HTTP status codes into bounded classes +// to prevent unbounded cardinality from unexpected status codes. +func NormalizeStatusCode(code int) string { + switch { + case code >= 200 && code < 300: + return "2xx" + case code >= 300 && code < 400: + return "3xx" + case code >= 400 && code < 500: + return "4xx" + case code >= 500 && code < 600: + return "5xx" + default: + return fmt.Sprintf("%dxx", code/100) + } +}