From 73c5855e0136028a9845ff6fd18812901ae7fca1 Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Fri, 19 Dec 2025 13:43:04 -0500 Subject: [PATCH] Force reinstall of operator resources on release version upgrade Fixes upgrade failures from 0.4 to main caused by incompatible webhook configuration changes that trigger index out of range panics during manifest merging. When OPENSTACK_RELEASE_VERSION is bumped, the controller now: - Detects the version change by comparing against status.ReleaseVersion - Deletes all owned resources (deployments, services, serviceaccounts, configmaps) - Removes managed webhooks (validating and mutating configurations) - Requeues to recreate resources with new manifests This one-time cleanup ensures a clean slate for incompatible upgrades where the structure of resources (especially webhooks) has changed between versions. Adds ReleaseVersion field to OpenStackStatus to track the deployed version. Jira: OSPRH-23865 Co-authored-by: Brendan Shephard --- .../operator.openstack.org_openstacks.yaml | 4 + api/operator/v1beta1/openstack_types.go | 3 + api/operator/v1beta1/zz_generated.deepcopy.go | 5 + .../operator.openstack.org_openstacks.yaml | 4 + .../operator/openstack_controller.go | 99 ++++++++++++++++++- 5 files changed, 114 insertions(+), 1 deletion(-) diff --git a/api/bases/operator.openstack.org_openstacks.yaml b/api/bases/operator.openstack.org_openstacks.yaml index 63f986df9..32ea32218 100644 --- a/api/bases/operator.openstack.org_openstacks.yaml +++ b/api/bases/operator.openstack.org_openstacks.yaml @@ -268,6 +268,10 @@ spec: for this object. format: int64 type: integer + releaseVersion: + description: ReleaseVersion - the OpenStack release version that has + been successfully deployed + type: string totalOperatorCount: description: TotalOperatorCount - the number all operators available type: integer diff --git a/api/operator/v1beta1/openstack_types.go b/api/operator/v1beta1/openstack_types.go index b8833a8b5..1918bd2a3 100644 --- a/api/operator/v1beta1/openstack_types.go +++ b/api/operator/v1beta1/openstack_types.go @@ -256,6 +256,9 @@ type OpenStackStatus struct { // ContainerImage - the container image that has been successfully deployed ContainerImage *string `json:"containerImage,omitempty"` + + // ReleaseVersion - the OpenStack release version that has been successfully deployed + ReleaseVersion *string `json:"releaseVersion,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/operator/v1beta1/zz_generated.deepcopy.go b/api/operator/v1beta1/zz_generated.deepcopy.go index 64cc6b57c..601347ecf 100644 --- a/api/operator/v1beta1/zz_generated.deepcopy.go +++ b/api/operator/v1beta1/zz_generated.deepcopy.go @@ -158,6 +158,11 @@ func (in *OpenStackStatus) DeepCopyInto(out *OpenStackStatus) { *out = new(string) **out = **in } + if in.ReleaseVersion != nil { + in, out := &in.ReleaseVersion, &out.ReleaseVersion + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenStackStatus. diff --git a/config/crd/bases/operator.openstack.org_openstacks.yaml b/config/crd/bases/operator.openstack.org_openstacks.yaml index 63f986df9..32ea32218 100644 --- a/config/crd/bases/operator.openstack.org_openstacks.yaml +++ b/config/crd/bases/operator.openstack.org_openstacks.yaml @@ -268,6 +268,10 @@ spec: for this object. format: int64 type: integer + releaseVersion: + description: ReleaseVersion - the OpenStack release version that has + been successfully deployed + type: string totalOperatorCount: description: TotalOperatorCount - the number all operators available type: integer diff --git a/internal/controller/operator/openstack_controller.go b/internal/controller/operator/openstack_controller.go index 52708237a..f52a62f3d 100644 --- a/internal/controller/operator/openstack_controller.go +++ b/internal/controller/operator/openstack_controller.go @@ -43,6 +43,7 @@ import ( "github.com/openstack-k8s-operators/openstack-operator/internal/operator" "github.com/openstack-k8s-operators/openstack-operator/internal/operator/bindata" "github.com/pkg/errors" + admissionv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -250,6 +251,39 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, err } + // Check if OPENSTACK_RELEASE_VERSION has changed - if so, delete all owned resources + // This is a one-time fix to handle incompatible upgrades + if instance.Status.ReleaseVersion != nil && *instance.Status.ReleaseVersion != openstackReleaseVersion { + Log.Info("OpenStack release version changed, deleting all owned resources", + "old", *instance.Status.ReleaseVersion, + "new", openstackReleaseVersion) + + if err := r.deleteAllOwnedResources(ctx, instance); err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + operatorv1beta1.OpenStackOperatorReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + operatorv1beta1.OpenStackOperatorErrorMessage, + err)) + return ctrl.Result{}, err + } + + // Reset the container image status to force re-application of CRDs and RBAC + instance.Status.ContainerImage = nil + + // Update the release version in status + instance.Status.ReleaseVersion = &openstackReleaseVersion + + // Requeue to allow resources to be deleted before recreating + Log.Info("Resources deleted, requeuing to recreate with new version") + return ctrl.Result{RequeueAfter: time.Duration(5) * time.Second}, nil + } + + // Set the release version if not set + if instance.Status.ReleaseVersion == nil { + instance.Status.ReleaseVersion = &openstackReleaseVersion + } + if err := r.applyManifests(ctx, instance); err != nil { instance.Status.Conditions.Set(condition.FalseCondition( operatorv1beta1.OpenStackOperatorReadyCondition, @@ -316,6 +350,69 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } +func deleteOwnedResources[L any, T any]( + ctx context.Context, + r *OpenStackReconciler, + instance client.Object, + list L, + itemsGetter func(L) []T, +) error { + log := r.GetLogger(ctx) + + err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()}) + if err != nil { + return errors.Wrap(err, "failed to list resources") + } + + for _, item := range itemsGetter(list) { + obj := any(&item).(client.Object) + if metav1.IsControlledBy(obj, instance) { + log.Info("Deleting owned resource", "kind", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName()) + err := r.Delete(ctx, obj) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete %s", obj.GetName()) + } + } + } + return nil +} + +func (r *OpenStackReconciler) deleteAllOwnedResources(ctx context.Context, instance *operatorv1beta1.OpenStack) error { + Log := r.GetLogger(ctx) + Log.Info("Deleting all owned resources for release version upgrade") + + err := deleteOwnedResources(ctx, r, instance, &appsv1.DeploymentList{}, func(l *appsv1.DeploymentList) []appsv1.Deployment { return l.Items }) + if err != nil { + return err + } + + err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceAccountList{}, func(l *corev1.ServiceAccountList) []corev1.ServiceAccount { return l.Items }) + if err != nil { + return err + } + + err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceList{}, func(l *corev1.ServiceList) []corev1.Service { return l.Items }) + if err != nil { + return err + } + + labelSelector, _ := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{"openstack.openstack.org/managed": "true"}, + }) + + deleteOpts := client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{LabelSelector: labelSelector}, + } + + err = r.DeleteAllOf(ctx, &admissionv1.ValidatingWebhookConfiguration{}, &deleteOpts) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrap(err, "failed to delete validating webhooks") + } + + Log.Info("All owned resources deleted successfully") + return nil +} + func (r *OpenStackReconciler) reconcileDelete(ctx context.Context, instance *operatorv1beta1.OpenStack, helper *helper.Helper) (ctrl.Result, error) { Log := r.GetLogger(ctx) Log.Info("Reconciling OpenStack initialization resource delete") @@ -987,7 +1084,7 @@ func (r *OpenStackReconciler) postCleanupObsoleteResources(ctx context.Context, // The horizon-operator.openstack-operators has references to old roles/bindings // the code below will delete those references before continuing for _, ref := range refs { - refData := ref.(map[string]interface{}) + refData := ref.(map[string]any) Log.Info("Deleting operator reference", "Reference", ref) obj := uns.Unstructured{} obj.SetName(refData["name"].(string))