From 462b4c229895d66e0595e85969b2fbb1fac8df89 Mon Sep 17 00:00:00 2001 From: Siu Wa Wu Date: Thu, 14 May 2026 10:54:36 +1000 Subject: [PATCH 1/2] Use backplane elevation for network verifier pod mode --- cmd/network/verification.go | 33 ++++++++++++++++++----- cmd/network/verification_pod_mode_test.go | 21 ++++++++++++++- cmd/network/verification_test.go | 16 +++++++++-- pkg/k8s/client.go | 15 +++++++++++ 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/cmd/network/verification.go b/cmd/network/verification.go index 255f1cd9c..b0c069ebc 100644 --- a/cmd/network/verification.go +++ b/cmd/network/verification.go @@ -107,6 +107,8 @@ type EgressVerification struct { SkipServiceLog bool // hiveOcmUrl is the OCM environment URL for Hive operations (Classic clusters only) hiveOcmUrl string + // Reason is the justification for elevation (required for pod mode write operations) + Reason string } func NewCmdValidateEgress() *cobra.Command { @@ -155,13 +157,16 @@ func NewCmdValidateEgress() *cobra.Command { # Override automatic selection of the list of endpoints to check osdctl network verify-egress --cluster-id my-rosa-cluster --platform hostedcluster - # Run in pod mode using Kubernetes jobs (requires cluster access) - osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode + # Run in pod mode using Kubernetes jobs (requires cluster access and elevation) + osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode --reason "PD-12345" + + # Run in pod mode with custom namespace and reason + osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode --namespace my-namespace --reason "OHSS-67890" # Run in pod mode using ServiceAccount (when running inside a Kubernetes Pod) osdctl network verify-egress --pod-mode --region us-east-1 --namespace my-namespace - # Run in pod mode with custom namespace and kubeconfig + # Run in pod mode with custom namespace and kubeconfig (no elevation needed with explicit kubeconfig) osdctl network verify-egress --pod-mode --region us-east-1 --namespace my-namespace --kubeconfig ~/.kube/config # Run network verification without sending service logs on failure @@ -205,6 +210,7 @@ func NewCmdValidateEgress() *cobra.Command { validateEgressCmd.Flags().StringVar(&e.Namespace, "namespace", "openshift-network-diagnostics", "(optional) Kubernetes namespace to run verification pods in") validateEgressCmd.Flags().BoolVar(&e.SkipServiceLog, "skip-service-log", false, "(optional) disable automatic service log sending when verification fails") validateEgressCmd.Flags().StringVar(&e.hiveOcmUrl, "hive-ocm-url", "", "(optional) OCM environment URL for hive operations. Aliases: 'production', 'staging', 'integration'. If not specified, uses the same OCM environment as the target cluster.") + validateEgressCmd.Flags().StringVar(&e.Reason, "reason", "", "(required for pod mode with --cluster-id) The reason for elevation to perform write operations (usually an OHSS or PD ticket)") return validateEgressCmd } @@ -667,6 +673,11 @@ func (e *EgressVerification) validateInput() error { return fmt.Errorf("pod mode requires either --cluster-id or --platform to determine platform type") } + // Require reason for elevation when using backplane (cluster-id provided, but no explicit kubeconfig) + if e.ClusterId != "" && e.Reason == "" && e.KubeConfig == "" { + return fmt.Errorf("pod mode with --cluster-id requires --reason flag for elevation (write operations need backplane-cluster-admin). Example: --reason 'PD-12345' or --reason 'OHSS-67890'") + } + // For AWS platforms without cluster-id, require region if e.ClusterId == "" && e.Region == "" { // Check if we're dealing with an AWS platform @@ -713,12 +724,22 @@ func (e *EgressVerification) getRestConfig(ctx context.Context) (*rest.Config, e e.log.Info(ctx, "Pod mode using provided kubeconfig: %s", e.KubeConfig) return restConfig, nil } else if e.ClusterId != "" { - // Priority 2: Use backplane credentials when cluster ID is available - restConfig, err := k8s.NewRestConfig(e.ClusterId) + // Priority 2: Use backplane credentials with ELEVATION when cluster ID is available + var err error + if e.Reason != "" { + // Elevate as backplane-cluster-admin with reason for write operations + reasonMsg := fmt.Sprintf("Network verification pod mode: %s", e.Reason) + restConfig, err = k8s.NewRestConfigAsBackplaneClusterAdmin(e.ClusterId, reasonMsg) + e.log.Info(ctx, "Pod mode using elevated backplane credentials (backplane-cluster-admin) for cluster: %s", e.ClusterId) + } else { + restConfig, err = k8s.NewRestConfig(e.ClusterId) + e.log.Info(ctx, "Pod mode using backplane credentials for cluster: %s", e.ClusterId) + } + if err != nil { return nil, fmt.Errorf("failed to get REST config from backplane for cluster %s: %w", e.ClusterId, err) } - e.log.Info(ctx, "Pod mode using backplane credentials for cluster: %s", e.ClusterId) + return restConfig, nil } else if _, err := os.Stat(serviceAccountTokenPath); err == nil { // Priority 3: Try in-cluster configuration when no explicit config provided diff --git a/cmd/network/verification_pod_mode_test.go b/cmd/network/verification_pod_mode_test.go index ef5977d1a..2d1998b44 100644 --- a/cmd/network/verification_pod_mode_test.go +++ b/cmd/network/verification_pod_mode_test.go @@ -268,10 +268,29 @@ func TestEgressVerification_ValidateInput_PodMode(t *testing.T) { errorMsg string }{ { - name: "pod_mode_with_cluster_id", + name: "pod_mode_with_cluster_id_and_reason", ev: &EgressVerification{ PodMode: true, ClusterId: "test-cluster", + Reason: "PD-12345", + }, + wantError: false, + }, + { + name: "pod_mode_with_cluster_id_without_reason", + ev: &EgressVerification{ + PodMode: true, + ClusterId: "test-cluster", + }, + wantError: true, + errorMsg: "pod mode with --cluster-id requires --reason flag for elevation", + }, + { + name: "pod_mode_with_cluster_id_and_kubeconfig_no_reason_needed", + ev: &EgressVerification{ + PodMode: true, + ClusterId: "test-cluster", + KubeConfig: "/path/to/kubeconfig", }, wantError: false, }, diff --git a/cmd/network/verification_test.go b/cmd/network/verification_test.go index da4ba3768..4fad31b6e 100644 --- a/cmd/network/verification_test.go +++ b/cmd/network/verification_test.go @@ -910,7 +910,19 @@ func TestEgressVerification_GetRestConfig(t *testing.T) { expectedResult: "explicit kubeconfig should be used", }, { - name: "priority_2_backplane_credentials", + name: "priority_2_backplane_credentials_with_elevation", + ev: &EgressVerification{ + ClusterId: "test-cluster-id", + Reason: "PD-12345", + KubeConfig: "", // No explicit kubeconfig + log: newTestLogger(t), + }, + expectedLog: "Pod mode using elevated backplane credentials (backplane-cluster-admin) for cluster: test-cluster-id", + expectError: false, + expectedResult: "backplane credentials with elevation should be used", + }, + { + name: "priority_2_backplane_credentials_without_elevation", ev: &EgressVerification{ ClusterId: "test-cluster-id", KubeConfig: "", // No explicit kubeconfig @@ -918,7 +930,7 @@ func TestEgressVerification_GetRestConfig(t *testing.T) { }, expectedLog: "Pod mode using backplane credentials for cluster: test-cluster-id", expectError: false, - expectedResult: "backplane credentials should be used", + expectedResult: "backplane credentials without elevation should be used", }, { name: "priority_4_default_kubeconfig_fallback", diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index c365347b3..f5125ee50 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -163,6 +163,21 @@ func NewRestConfig(clusterID string) (*rest.Config, error) { return cfg, nil } +// NewRestConfigAsBackplaneClusterAdmin returns a *rest.Config for the given cluster ID with backplane-cluster-admin elevation +func NewRestConfigAsBackplaneClusterAdmin(clusterID string, elevationReasons ...string) (*rest.Config, error) { + bp, err := bpconfig.GetBackplaneConfiguration() + if err != nil { + return nil, fmt.Errorf("failed to load backplane-cli config: %w", err) + } + + cfg, err := bplogin.GetRestConfigAsUser(bp, clusterID, "backplane-cluster-admin", elevationReasons...) + if err != nil { + return nil, err + } + + return cfg, nil +} + // Create Backplane connection to a provided cluster, using a provided ocm sdk connection // This is intended to allow backplane connections to multiple clusters which exist in different // ocm environments by allowing the caller to provide an ocm connection to the function. From b2ba9c598d719c1a204e73f7c37b34353eb3ac3d Mon Sep 17 00:00:00 2001 From: Siu Wa Wu Date: Thu, 14 May 2026 12:57:35 +1000 Subject: [PATCH 2/2] update docs --- docs/README.md | 1 + docs/osdctl_network_verify-egress.md | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index bbaa4e80c..bc4975070 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3888,6 +3888,7 @@ osdctl network verify-egress [flags] --platform string (optional) override for cloud platform/product. E.g., 'aws-classic' (OSD/ROSA Classic), 'aws-hcp' (ROSA HCP), 'aws-hcp-zeroegress', 'aws-govcloud-classic' (AWS GovCloud), or 'gcp-classic' --pod-mode (optional) run verification using Kubernetes pods instead of cloud instances --probe string (optional) select the probe to be used for egress testing. Either 'curl' (default) or 'legacy' (default "curl") + --reason string (required for pod mode with --cluster-id) The reason for elevation to perform write operations (usually an OHSS or PD ticket) --region string (optional) AWS region, required for --pod-mode if not passing a --cluster-id --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") --security-group string (optional) security group ID override for osd-network-verifier, required if not specifying --cluster-id diff --git a/docs/osdctl_network_verify-egress.md b/docs/osdctl_network_verify-egress.md index 186c0f27b..9c0da6eb5 100644 --- a/docs/osdctl_network_verify-egress.md +++ b/docs/osdctl_network_verify-egress.md @@ -52,13 +52,16 @@ osdctl network verify-egress [flags] # Override automatic selection of the list of endpoints to check osdctl network verify-egress --cluster-id my-rosa-cluster --platform hostedcluster - # Run in pod mode using Kubernetes jobs (requires cluster access) - osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode + # Run in pod mode using Kubernetes jobs (requires cluster access and elevation) + osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode --reason "PD-12345" + + # Run in pod mode with custom namespace and reason + osdctl network verify-egress --cluster-id my-rosa-cluster --pod-mode --namespace my-namespace --reason "OHSS-67890" # Run in pod mode using ServiceAccount (when running inside a Kubernetes Pod) osdctl network verify-egress --pod-mode --region us-east-1 --namespace my-namespace - # Run in pod mode with custom namespace and kubeconfig + # Run in pod mode with custom namespace and kubeconfig (no elevation needed with explicit kubeconfig) osdctl network verify-egress --pod-mode --region us-east-1 --namespace my-namespace --kubeconfig ~/.kube/config # Run network verification without sending service logs on failure @@ -94,6 +97,7 @@ osdctl network verify-egress [flags] --platform string (optional) override for cloud platform/product. E.g., 'aws-classic' (OSD/ROSA Classic), 'aws-hcp' (ROSA HCP), 'aws-hcp-zeroegress', 'aws-govcloud-classic' (AWS GovCloud), or 'gcp-classic' --pod-mode (optional) run verification using Kubernetes pods instead of cloud instances --probe string (optional) select the probe to be used for egress testing. Either 'curl' (default) or 'legacy' (default "curl") + --reason string (required for pod mode with --cluster-id) The reason for elevation to perform write operations (usually an OHSS or PD ticket) --region string (optional) AWS region, required for --pod-mode if not passing a --cluster-id --security-group string (optional) security group ID override for osd-network-verifier, required if not specifying --cluster-id --skip-service-log (optional) disable automatic service log sending when verification fails