From cd026bdc7f2f3260bc216508318a18fdc33c3fcc Mon Sep 17 00:00:00 2001 From: Andrew Bays Date: Wed, 6 May 2026 09:39:13 -0400 Subject: [PATCH 1/3] [deploy_minio] Add MinIO deployment role Deploy MinIO as a lightweight S3-compatible object store for use as the Velero backup target in development and CI environments. Signed-off-by: Andrew Bays Signed-off-by: Martin Schuppert Co-Authored-By: Claude Opus 4.6 --- roles/deploy_minio/defaults/main.yml | 28 +++++ roles/deploy_minio/meta/main.yml | 8 ++ roles/deploy_minio/tasks/main.yml | 90 +++++++++++++++ roles/deploy_minio/templates/minio.yaml.j2 | 125 +++++++++++++++++++++ zuul.d/molecule.yaml | 9 ++ zuul.d/projects.yaml | 1 + 6 files changed, 261 insertions(+) create mode 100644 roles/deploy_minio/defaults/main.yml create mode 100644 roles/deploy_minio/meta/main.yml create mode 100644 roles/deploy_minio/tasks/main.yml create mode 100644 roles/deploy_minio/templates/minio.yaml.j2 diff --git a/roles/deploy_minio/defaults/main.yml b/roles/deploy_minio/defaults/main.yml new file mode 100644 index 000000000..4d2c48c63 --- /dev/null +++ b/roles/deploy_minio/defaults/main.yml @@ -0,0 +1,28 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# All variables intended for modification should be placed in this file. +# All variables within this role should have a prefix of "cifmw_deploy_minio" + +cifmw_deploy_minio_namespace: minio +cifmw_deploy_minio_storage_size: 10Gi +cifmw_deploy_minio_storage_class: "" +cifmw_deploy_minio_root_user: minio +cifmw_deploy_minio_root_password: minio123 +cifmw_deploy_minio_buckets: + - velero + - loki +cifmw_deploy_minio_image: quay.io/minio/minio:latest diff --git a/roles/deploy_minio/meta/main.yml b/roles/deploy_minio/meta/main.yml new file mode 100644 index 000000000..ea86e0532 --- /dev/null +++ b/roles/deploy_minio/meta/main.yml @@ -0,0 +1,8 @@ +--- +galaxy_info: + role_name: deploy_minio + namespace: cifmw + author: Red Hat + description: Deploy MinIO as S3-compatible storage backend + license: Apache-2.0 + min_ansible_version: "2.11" diff --git a/roles/deploy_minio/tasks/main.yml b/roles/deploy_minio/tasks/main.yml new file mode 100644 index 000000000..bb2355647 --- /dev/null +++ b/roles/deploy_minio/tasks/main.yml @@ -0,0 +1,90 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Deploy MinIO +# +# Deploys MinIO as an S3-compatible storage backend. +# Creates namespace, PVC, Deployment, Service, Routes. +# Bucket is created via mkdir in the container command. +# +# Output facts: +# cifmw_deploy_minio_access_key: Root user (for OADP credentials) +# cifmw_deploy_minio_secret_key: Root password (for OADP credentials) + +- name: Create temp directory for rendered templates + ansible.builtin.tempfile: + state: directory + prefix: deploy-minio- + register: _deploy_minio_rendered_dir + +- name: Render MinIO manifests + ansible.builtin.template: + src: minio.yaml.j2 + dest: "{{ _deploy_minio_rendered_dir.path }}/minio.yaml" + mode: "0644" + +- name: Apply MinIO manifests + kubernetes.core.k8s: + src: "{{ _deploy_minio_rendered_dir.path }}/minio.yaml" + state: present + +- name: Wait for MinIO deployment to be ready + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: Deployment + name: minio + namespace: "{{ cifmw_deploy_minio_namespace }}" + wait: true + wait_timeout: 300 + wait_condition: + type: Available + status: "True" + +- name: Export credentials for downstream roles + ansible.builtin.set_fact: + cifmw_deploy_minio_access_key: "{{ cifmw_deploy_minio_root_user }}" + cifmw_deploy_minio_secret_key: "{{ cifmw_deploy_minio_root_password }}" + +- name: Get MinIO console route + kubernetes.core.k8s_info: + api_version: route.openshift.io/v1 + kind: Route + name: minio-console + namespace: "{{ cifmw_deploy_minio_namespace }}" + register: _minio_console_route + +- name: Get MinIO API route + kubernetes.core.k8s_info: + api_version: route.openshift.io/v1 + kind: Route + name: minio-api + namespace: "{{ cifmw_deploy_minio_namespace }}" + register: _minio_api_route + +- name: Print setup complete + ansible.builtin.debug: + msg: + - "========================================" + - "MinIO Setup Complete" + - "========================================" + - "Console: https://{{ _minio_console_route.resources[0].spec.host }}" + - "API: https://{{ _minio_api_route.resources[0].spec.host }}" + - "Buckets: {{ cifmw_deploy_minio_buckets | join(', ') }}" + +- name: Cleanup rendered templates + ansible.builtin.file: + path: "{{ _deploy_minio_rendered_dir.path }}" + state: absent diff --git a/roles/deploy_minio/templates/minio.yaml.j2 b/roles/deploy_minio/templates/minio.yaml.j2 new file mode 100644 index 000000000..cfbb5f55a --- /dev/null +++ b/roles/deploy_minio/templates/minio.yaml.j2 @@ -0,0 +1,125 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ cifmw_deploy_minio_namespace }} + labels: + name: {{ cifmw_deploy_minio_namespace }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: minio-pvc + namespace: {{ cifmw_deploy_minio_namespace }} +spec: + accessModes: + - ReadWriteOnce +{% if cifmw_deploy_minio_storage_class %} + storageClassName: {{ cifmw_deploy_minio_storage_class }} +{% endif %} + resources: + requests: + storage: {{ cifmw_deploy_minio_storage_size }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio + namespace: {{ cifmw_deploy_minio_namespace }} +spec: + selector: + matchLabels: + app: minio + strategy: + type: Recreate + template: + metadata: + labels: + app: minio + spec: + containers: + - name: minio + image: {{ cifmw_deploy_minio_image }} + command: + - /bin/bash + - -c + - | +{% for bucket in cifmw_deploy_minio_buckets %} + mkdir -p /data/{{ bucket }} && \ +{% endfor %} + minio server /data --console-address :9001 + env: + - name: MINIO_ROOT_USER + value: "{{ cifmw_deploy_minio_root_user }}" + - name: MINIO_ROOT_PASSWORD + value: "{{ cifmw_deploy_minio_root_password }}" + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + volumeMounts: + - name: data + mountPath: /data + livenessProbe: + httpGet: + path: /minio/health/live + port: 9000 + initialDelaySeconds: 30 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /minio/health/ready + port: 9000 + initialDelaySeconds: 30 + periodSeconds: 20 + volumes: + - name: data + persistentVolumeClaim: + claimName: minio-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: minio + namespace: {{ cifmw_deploy_minio_namespace }} +spec: + selector: + app: minio + ports: + - name: api + port: 9000 + targetPort: 9000 + - name: console + port: 9001 + targetPort: 9001 +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: minio-console + namespace: {{ cifmw_deploy_minio_namespace }} +spec: + to: + kind: Service + name: minio + port: + targetPort: console + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: minio-api + namespace: {{ cifmw_deploy_minio_namespace }} +spec: + to: + kind: Service + name: minio + port: + targetPort: api + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index da963e698..d6ff17cf9 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -947,6 +947,15 @@ - ^.config/molecule/.* name: cifmw-molecule-deploy_loki parent: cifmw-molecule-noop +- job: + files: + - ^common-requirements.txt + - ^test-requirements.txt + - ^roles/deploy_minio/.* + - ^ci/playbooks/molecule.* + - ^.config/molecule/.* + name: cifmw-molecule-deploy_minio + parent: cifmw-molecule-noop - job: files: - ^common-requirements.txt diff --git a/zuul.d/projects.yaml b/zuul.d/projects.yaml index fe2a87ee5..e6e3a9ad9 100644 --- a/zuul.d/projects.yaml +++ b/zuul.d/projects.yaml @@ -44,6 +44,7 @@ - cifmw-molecule-copy_container - cifmw-molecule-deploy_bmh - cifmw-molecule-deploy_loki + - cifmw-molecule-deploy_minio - cifmw-molecule-devscripts - cifmw-molecule-discover_latest_image - cifmw-molecule-dlrn_promote From 78e3f7db96e02a5b2c580e3c76dd4242316d99ad Mon Sep 17 00:00:00 2001 From: Andrew Bays Date: Wed, 6 May 2026 09:40:22 -0400 Subject: [PATCH 2/3] [openshift_adp] Add OADP operator installation role Install and configure the OADP (OpenShift API for Data Protection) operator with an S3-compatible storage backend, create the DataProtectionApplication CR, set up VolumeSnapshotClass for CSI snapshots, and verify the BackupStorageLocation is available. Signed-off-by: Andrew Bays Signed-off-by: Martin Schuppert Co-Authored-By: Claude Opus 4.6 --- docs/dictionary/en-custom.txt | 2 + roles/openshift_adp/defaults/main.yml | 33 +++ roles/openshift_adp/meta/main.yml | 8 + roles/openshift_adp/tasks/main.yml | 317 ++++++++++++++++++++++++++ zuul.d/molecule.yaml | 9 + zuul.d/projects.yaml | 1 + 6 files changed, 370 insertions(+) create mode 100644 roles/openshift_adp/defaults/main.yml create mode 100644 roles/openshift_adp/meta/main.yml create mode 100644 roles/openshift_adp/tasks/main.yml diff --git a/docs/dictionary/en-custom.txt b/docs/dictionary/en-custom.txt index 016199c88..eb3e04376 100644 --- a/docs/dictionary/en-custom.txt +++ b/docs/dictionary/en-custom.txt @@ -418,6 +418,7 @@ num nvme nwy nzgdh +OADP oauth observability oc @@ -635,6 +636,7 @@ vcpus vda venv vexxhost +Velero virbr virsh virt diff --git a/roles/openshift_adp/defaults/main.yml b/roles/openshift_adp/defaults/main.yml new file mode 100644 index 000000000..088c27a90 --- /dev/null +++ b/roles/openshift_adp/defaults/main.yml @@ -0,0 +1,33 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# All variables intended for modification should be placed in this file. +# All variables within this role should have a prefix of "cifmw_openshift_adp" + +# OADP operator +cifmw_openshift_adp_namespace: openshift-adp +cifmw_openshift_adp_channel: stable +cifmw_openshift_adp_enable_node_agent: true + +# S3 backend (MinIO or other S3-compatible) +cifmw_openshift_adp_s3_namespace: minio +cifmw_openshift_adp_s3_bucket: velero +cifmw_openshift_adp_s3_region: minio +cifmw_openshift_adp_s3_prefix: rhoso +cifmw_openshift_adp_s3_force_path_style: true +cifmw_openshift_adp_s3_insecure_skip_tls: true +# cifmw_openshift_adp_s3_access_key: REQUIRED +# cifmw_openshift_adp_s3_secret_key: REQUIRED diff --git a/roles/openshift_adp/meta/main.yml b/roles/openshift_adp/meta/main.yml new file mode 100644 index 000000000..551cdb974 --- /dev/null +++ b/roles/openshift_adp/meta/main.yml @@ -0,0 +1,8 @@ +--- +galaxy_info: + role_name: openshift_adp + namespace: cifmw + author: Red Hat + description: Install and configure OADP (OpenShift API for Data Protection) + license: Apache-2.0 + min_ansible_version: "2.11" diff --git a/roles/openshift_adp/tasks/main.yml b/roles/openshift_adp/tasks/main.yml new file mode 100644 index 000000000..b6c1e3d77 --- /dev/null +++ b/roles/openshift_adp/tasks/main.yml @@ -0,0 +1,317 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# OADP Setup +# +# Installs and configures OADP (OpenShift API for Data Protection) +# with an S3-compatible storage backend. + +- name: Verify S3 credentials are provided + ansible.builtin.fail: + msg: >- + cifmw_openshift_adp_s3_access_key and cifmw_openshift_adp_s3_secret_key + are required. Deploy an S3 backend first (e.g. deploy_minio role). + when: >- + cifmw_openshift_adp_s3_access_key is not defined or + cifmw_openshift_adp_s3_secret_key is not defined + +- name: Print setup header + ansible.builtin.debug: + msg: + - "========================================" + - "OADP Setup" + - "========================================" + - "OADP Namespace: {{ cifmw_openshift_adp_namespace }}" + - "OADP Channel: {{ cifmw_openshift_adp_channel }}" + - "S3 Namespace: {{ cifmw_openshift_adp_s3_namespace }}" + - "S3 Bucket: {{ cifmw_openshift_adp_s3_bucket }}" + - "Node Agent (Kopia): {{ cifmw_openshift_adp_enable_node_agent }}" + +- name: Create OADP namespace + kubernetes.core.k8s: + api_version: v1 + kind: Namespace + name: "{{ cifmw_openshift_adp_namespace }}" + state: present + +- name: Create OperatorGroup for OADP + kubernetes.core.k8s: + state: present + definition: + apiVersion: operators.coreos.com/v1 + kind: OperatorGroup + metadata: + name: openshift-adp-operator-group + namespace: "{{ cifmw_openshift_adp_namespace }}" + spec: + targetNamespaces: + - "{{ cifmw_openshift_adp_namespace }}" + +- name: Create Subscription for OADP operator + kubernetes.core.k8s: + state: present + definition: + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: redhat-oadp-operator + namespace: "{{ cifmw_openshift_adp_namespace }}" + spec: + channel: "{{ cifmw_openshift_adp_channel }}" + installPlanApproval: Automatic + name: redhat-oadp-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + +- name: Wait for OADP operator to be ready + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ cifmw_openshift_adp_namespace }}" + label_selectors: + - control-plane=controller-manager + wait: true + wait_timeout: 300 + wait_condition: + type: Ready + status: "True" + register: _operator_wait + retries: 30 + delay: 10 + until: _operator_wait.resources | length > 0 + +- name: Create cloud credentials secret + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: cloud-credentials + namespace: "{{ cifmw_openshift_adp_namespace }}" + type: Opaque + stringData: + cloud: | + [default] + aws_access_key_id={{ cifmw_openshift_adp_s3_access_key }} + aws_secret_access_key={{ cifmw_openshift_adp_s3_secret_key }} + no_log: true + +- name: Get S3 API route + kubernetes.core.k8s_info: + api_version: route.openshift.io/v1 + kind: Route + name: minio-api + namespace: "{{ cifmw_openshift_adp_s3_namespace }}" + register: _s3_api_route + +- name: Create DataProtectionApplication + kubernetes.core.k8s: + state: present + definition: + apiVersion: oadp.openshift.io/v1alpha1 + kind: DataProtectionApplication + metadata: + name: velero + namespace: "{{ cifmw_openshift_adp_namespace }}" + spec: + configuration: + velero: + defaultPlugins: + - openshift + - aws + - csi + nodeAgent: + enable: "{{ cifmw_openshift_adp_enable_node_agent | bool }}" + uploaderType: kopia + backupLocations: + - velero: + provider: aws + default: true + objectStorage: + bucket: "{{ cifmw_openshift_adp_s3_bucket }}" + prefix: "{{ cifmw_openshift_adp_s3_prefix }}" + config: + region: "{{ cifmw_openshift_adp_s3_region }}" + s3ForcePathStyle: "{{ cifmw_openshift_adp_s3_force_path_style | lower }}" + s3Url: "https://{{ _s3_api_route.resources[0].spec.host }}" + insecureSkipTLSVerify: "{{ cifmw_openshift_adp_s3_insecure_skip_tls | lower }}" + credential: + name: cloud-credentials + key: cloud + +- name: Wait for Velero pod to be ready + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ cifmw_openshift_adp_namespace }}" + label_selectors: + - app.kubernetes.io/name=velero + wait: true + wait_timeout: 300 + wait_condition: + type: Ready + status: "True" + register: _velero_wait + retries: 30 + delay: 10 + until: _velero_wait.resources | length > 0 + +- name: Wait for node-agent pods to be ready + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ cifmw_openshift_adp_namespace }}" + label_selectors: + - app.kubernetes.io/name=node-agent + wait: true + wait_timeout: 300 + wait_condition: + type: Ready + status: "True" + when: cifmw_openshift_adp_enable_node_agent | bool + +- name: Get OADP pods + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ cifmw_openshift_adp_namespace }}" + register: _oadp_pods + +- name: Display OADP pods + ansible.builtin.debug: + msg: "{{ item.metadata.name }} - {{ item.status.phase }}" + loop: "{{ _oadp_pods.resources }}" + loop_control: + label: "{{ item.metadata.name }}" + +# ======================================== +# VolumeSnapshotClass for CSI snapshots +# ======================================== +- name: Check for existing VolumeSnapshotClass + kubernetes.core.k8s_info: + api_version: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshotClass + register: _vsc_list + +- name: Create VolumeSnapshotClass for OADP (TopoLVM/LVMS) + kubernetes.core.k8s: + state: present + definition: + apiVersion: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshotClass + metadata: + name: lvms-velero + labels: + velero.io/csi-volumesnapshot-class: "true" + annotations: + snapshot.storage.kubernetes.io/is-default-class: "false" + driver: topolvm.io + deletionPolicy: Retain + when: >- + _vsc_list.resources | + selectattr('driver', 'equalto', 'topolvm.io') | + list | length > 0 + +- name: Print VolumeSnapshotClass status (TopoLVM found) + ansible.builtin.debug: + msg: "VolumeSnapshotClass 'lvms-velero' created for OADP CSI snapshots" + when: >- + _vsc_list.resources | + selectattr('driver', 'equalto', 'topolvm.io') | + list | length > 0 + +- name: Print VolumeSnapshotClass status (no TopoLVM) + ansible.builtin.debug: + msg: "No TopoLVM driver found. If using a different CSI driver, manually create a VolumeSnapshotClass with velero.io/csi-volumesnapshot-class=true label." + when: >- + _vsc_list.resources | + selectattr('driver', 'equalto', 'topolvm.io') | + list | length == 0 + +# ======================================== +# Verify BackupStorageLocation +# ======================================== +- name: Wait for BackupStorageLocation to be available + kubernetes.core.k8s_info: + api_version: velero.io/v1 + kind: BackupStorageLocation + namespace: "{{ cifmw_openshift_adp_namespace }}" + register: _bsl_status + retries: 30 + delay: 10 + until: + - _bsl_status.resources | length > 0 + - (_bsl_status.resources[0].status.phase | default('')) == 'Available' + ignore_errors: true + +- name: Get BackupStorageLocation details + kubernetes.core.k8s_info: + api_version: velero.io/v1 + kind: BackupStorageLocation + namespace: "{{ cifmw_openshift_adp_namespace }}" + register: _bsl_output + +- name: Display BackupStorageLocation status + ansible.builtin.debug: + msg: >- + {{ item.metadata.name }} - Phase: {{ item.status.phase | default('Unknown') }} + loop: "{{ _bsl_output.resources }}" + loop_control: + label: "{{ item.metadata.name }}" + +- name: Get troubleshooting info if BSL not available + ansible.builtin.shell: | + echo "=== BackupStorageLocation ===" + oc get backupstoragelocation -n {{ cifmw_openshift_adp_namespace }} -o yaml + echo "" + echo "=== Velero Logs (last 50 lines) ===" + oc logs -n {{ cifmw_openshift_adp_namespace }} deployment/velero --tail=50 + register: _bsl_debug + changed_when: false + when: _bsl_status is failed + +- name: Display troubleshooting info + ansible.builtin.debug: + msg: "{{ _bsl_debug.stdout_lines }}" + when: _bsl_status is failed + +- name: Print success summary + ansible.builtin.debug: + msg: + - "========================================" + - "OADP Setup Complete" + - "========================================" + - "" + - "OADP Namespace: {{ cifmw_openshift_adp_namespace }}" + - "S3 API: https://{{ _s3_api_route.resources[0].spec.host }}" + - "Bucket: {{ cifmw_openshift_adp_s3_bucket }}" + - "BackupStorageLocation: Available" + when: _bsl_status is not failed + +- name: Print warning summary + ansible.builtin.debug: + msg: + - "========================================" + - "OADP Setup Complete with Warnings" + - "========================================" + - "" + - "BackupStorageLocation is not yet available." + - "" + - "Troubleshoot:" + - " oc get backupstoragelocation -n {{ cifmw_openshift_adp_namespace }} -o yaml" + - " oc logs -n {{ cifmw_openshift_adp_namespace }} deployment/velero" + when: _bsl_status is failed diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index d6ff17cf9..9f44f4685 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -1028,6 +1028,15 @@ - ^.config/molecule/.* name: cifmw-molecule-openshift_adm parent: cifmw-molecule-noop +- job: + files: + - ^common-requirements.txt + - ^test-requirements.txt + - ^roles/openshift_adp/.* + - ^ci/playbooks/molecule.* + - ^.config/molecule/.* + name: cifmw-molecule-openshift_adp + parent: cifmw-molecule-noop - job: files: - ^common-requirements.txt diff --git a/zuul.d/projects.yaml b/zuul.d/projects.yaml index e6e3a9ad9..6ea6ca035 100644 --- a/zuul.d/projects.yaml +++ b/zuul.d/projects.yaml @@ -76,6 +76,7 @@ - cifmw-molecule-nat64_appliance - cifmw-molecule-networking_mapper - cifmw-molecule-openshift_adm + - cifmw-molecule-openshift_adp - cifmw-molecule-openshift_login - cifmw-molecule-openshift_obs - cifmw-molecule-openshift_provisioner_node From 8988fd7dff17cae4b8c151dae7917b1d3e38d89d Mon Sep 17 00:00:00 2001 From: Andrew Bays Date: Wed, 6 May 2026 09:40:40 -0400 Subject: [PATCH 3/3] [cifmw_backup_restore] Add backup/restore orchestration role Orchestrate backup, restore, and cleanup of OpenStack control plane and data plane resources, including Galera database dumps, Velero CSI volume snapshots, and ordered multi-phase restore sequences. Also adds playbooks (backup_restore.yaml) and integrates backup and restore into the post-deployment pipeline. Signed-off-by: Andrew Bays Signed-off-by: Martin Schuppert Co-Authored-By: Claude Opus 4.6 Signed-off-by: Martin Schuppert --- docs/dictionary/en-custom.txt | 6 + playbooks/backup_restore.yaml | 68 +++ post-deployment.yml | 12 + roles/cifmw_backup_restore/README.md | 104 ++++ roles/cifmw_backup_restore/defaults/main.yml | 76 +++ roles/cifmw_backup_restore/meta/main.yml | 31 + roles/cifmw_backup_restore/tasks/backup.yml | 320 +++++++++++ roles/cifmw_backup_restore/tasks/cleanup.yml | 309 ++++++++++ roles/cifmw_backup_restore/tasks/e2e.yml | 254 +++++++++ roles/cifmw_backup_restore/tasks/main.yml | 25 + .../tasks/ovn_db_backup.yml | 83 +++ .../tasks/ovn_db_restore.yml | 124 ++++ roles/cifmw_backup_restore/tasks/restore.yml | 535 ++++++++++++++++++ .../tasks/restore_pin_pvcs.yml | 114 ++++ .../tasks/setup_galerabackup.yml | 88 +++ .../tasks/wait_for_restore.yml | 79 +++ .../00-resource-modifiers-configmap.yaml.j2 | 39 ++ .../01-restore-order-00-pvcs.yaml.j2 | 17 + .../02-restore-order-10-foundation.yaml.j2 | 19 + ...03-restore-order-20-infrastructure.yaml.j2 | 20 + .../04-restore-order-30-controlplane.yaml.j2 | 19 + .../05-restore-order-40-backup-config.yaml.j2 | 18 + .../templates/06a-galerarestore.yaml.j2 | 14 + .../07-restore-order-60-dataplane.yaml.j2 | 18 + .../templates/08-edpm-deployment.yaml.j2 | 12 + .../templates/backup-pvcs.yaml.j2 | 48 ++ .../templates/backup-resources.yaml.j2 | 20 + .../templates/galerabackup.yaml.j2 | 13 + zuul.d/molecule.yaml | 9 + zuul.d/projects.yaml | 1 + 30 files changed, 2495 insertions(+) create mode 100644 playbooks/backup_restore.yaml create mode 100644 roles/cifmw_backup_restore/README.md create mode 100644 roles/cifmw_backup_restore/defaults/main.yml create mode 100644 roles/cifmw_backup_restore/meta/main.yml create mode 100644 roles/cifmw_backup_restore/tasks/backup.yml create mode 100644 roles/cifmw_backup_restore/tasks/cleanup.yml create mode 100644 roles/cifmw_backup_restore/tasks/e2e.yml create mode 100644 roles/cifmw_backup_restore/tasks/main.yml create mode 100644 roles/cifmw_backup_restore/tasks/ovn_db_backup.yml create mode 100644 roles/cifmw_backup_restore/tasks/ovn_db_restore.yml create mode 100644 roles/cifmw_backup_restore/tasks/restore.yml create mode 100644 roles/cifmw_backup_restore/tasks/restore_pin_pvcs.yml create mode 100644 roles/cifmw_backup_restore/tasks/setup_galerabackup.yml create mode 100644 roles/cifmw_backup_restore/tasks/wait_for_restore.yml create mode 100644 roles/cifmw_backup_restore/templates/00-resource-modifiers-configmap.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/01-restore-order-00-pvcs.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/02-restore-order-10-foundation.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/03-restore-order-20-infrastructure.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/04-restore-order-30-controlplane.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/05-restore-order-40-backup-config.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/06a-galerarestore.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/07-restore-order-60-dataplane.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/08-edpm-deployment.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/backup-pvcs.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/backup-resources.yaml.j2 create mode 100644 roles/cifmw_backup_restore/templates/galerabackup.yaml.j2 diff --git a/docs/dictionary/en-custom.txt b/docs/dictionary/en-custom.txt index eb3e04376..c6fbec946 100644 --- a/docs/dictionary/en-custom.txt +++ b/docs/dictionary/en-custom.txt @@ -1,3 +1,4 @@ +AES APIs Amartya AssignedTeam @@ -227,6 +228,7 @@ fsid fultonj fusco fwcybtb +Galera gapped genericcloud genindex @@ -498,6 +500,8 @@ psathyan pubkey publicdomain pullsecret +PVC +PVCs pvs pwd pxe @@ -574,6 +578,7 @@ sso stateful stderr stdout +StorageClass stp str stricthostkeychecking @@ -661,6 +666,7 @@ vvvv vxlan vynxgdagahaac vzcg +WaitForFirstConsumer websso wget whitebox diff --git a/playbooks/backup_restore.yaml b/playbooks/backup_restore.yaml new file mode 100644 index 000000000..1214ab1a4 --- /dev/null +++ b/playbooks/backup_restore.yaml @@ -0,0 +1,68 @@ +--- +# End-to-end backup/restore test playbook +# +# Aligns with the openstack-k8s-operators backup-restore user guide (Galera, +# optional OVN NB/SB on PVC, OADP, ordered restore, Neutron–OVN sync post-EDPM). +# +# Used standalone or from post-deployment.yml (gated by +# cifmw_run_backup_restore_test). Logic lives in +# roles/cifmw_backup_restore/tasks/e2e.yml; variables are in the role defaults. +# +# Each step can be enabled/disabled independently for iterative testing. +# +# Prerequisites: +# - OpenStack control plane deployed and healthy +# - OpenStackBackupConfig CR created (for backup labeling) +# - For manual testing on a reproducer, run post_deployment.sh first: +# ./post_deployment.sh -e zuul_log_collection=true \ +# -e cifmw_nolog=false -e cifmw_run_tests=false +# +# Manual usage (reproducer): +# COMMON_ARGS="-i ~/ci-framework-data/artifacts/zuul_inventory.yml \ +# -e @~/ci-framework-data/parameters/reproducer-variables.yml \ +# -e @~/ci-framework-data/parameters/openshift-environment.yml" +# +# # Full run (with test workload): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_create_workload=true +# +# # Full run (without workload): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml +# +# # Install deps only: +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_run_backup=false \ +# -e cifmw_backup_restore_run_cleanup=false \ +# -e cifmw_backup_restore_run_restore=false +# +# # Backup only (deps already installed): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_install_deps=false \ +# -e cifmw_backup_restore_run_cleanup=false \ +# -e cifmw_backup_restore_run_restore=false +# +# # Cleanup + restore (backup already done): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_install_deps=false \ +# -e cifmw_backup_restore_run_backup=false \ +# -e cifmw_backup_restore_backup_timestamp=20260323-144546 +# +# # Restore only (cleanup already done): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_install_deps=false \ +# -e cifmw_backup_restore_run_backup=false \ +# -e cifmw_backup_restore_run_cleanup=false \ +# -e cifmw_backup_restore_backup_timestamp=20260323-144546 +# +# # With PVC pinning (WaitForFirstConsumer storage): +# ansible-playbook $COMMON_ARGS playbooks/backup_restore.yaml \ +# -e cifmw_backup_restore_pin_pvcs=true + +- name: Backup and Restore end-to-end test + hosts: "{{ cifmw_target_host | default('localhost') }}" + gather_facts: true + tasks: + - name: Run backup/restore end-to-end orchestration + ansible.builtin.import_role: + name: cifmw_backup_restore + tasks_from: e2e.yml diff --git a/post-deployment.yml b/post-deployment.yml index 746d3eaa7..b542532b2 100644 --- a/post-deployment.yml +++ b/post-deployment.yml @@ -73,6 +73,18 @@ tags: - compliance +- name: Run backup and restore test + hosts: "{{ cifmw_target_host | default('localhost') }}" + gather_facts: true + tasks: + - name: Run backup/restore end-to-end orchestration + ansible.builtin.import_role: + name: cifmw_backup_restore + tasks_from: e2e.yml + when: cifmw_run_backup_restore_test | default(false) | bool + tags: + - backup-restore + - name: Run hooks and inject status flag hosts: "{{ cifmw_target_host | default('localhost') }}" gather_facts: true diff --git a/roles/cifmw_backup_restore/README.md b/roles/cifmw_backup_restore/README.md new file mode 100644 index 000000000..a7ccce392 --- /dev/null +++ b/roles/cifmw_backup_restore/README.md @@ -0,0 +1,104 @@ +# cifmw_backup_restore + +Automate OpenStack on OpenShift backup and restore operations using OADP +(OpenShift API for Data Protection) and Velero. The role supports three +actions: **backup**, **restore**, and **cleanup**. + +- **backup** — creates Galera database dumps, optionally backs up OVN NB/SB + databases onto their PVCs, then creates Velero backups of labeled PVCs + (via CSI snapshots) and cluster resources. +- **restore** — performs an ordered Velero restore sequence (PVCs, + foundation, infrastructure, control plane, Galera, optional OVN file restore, + full control plane resume, dataplane, EDPM), then Neutron–OVN verification and + sync (**log** mode, then **repair**, matching the backup-restore user guide Step 12). +- **cleanup** — tears down dataplane and control-plane resources so the + namespace is ready for a fresh restore. + +## Privilege escalation + +None. All cluster operations are performed through `oc` against the target +OpenShift cluster. + +## Parameters + +### Common + +* `cifmw_backup_restore_action`: (String) Action to perform. Must be one of `backup`, `restore`, or `cleanup`. Defaults to `""` (role will fail if unset). +* `cifmw_backup_restore_namespace`: (String) Target OpenStack namespace. Defaults to `openstack`. +* `cifmw_backup_restore_oadp_namespace`: (String) Namespace where Velero/OADP is running. Defaults to `openshift-adp`. +* `cifmw_backup_restore_auto_ack`: (Boolean) Skip interactive pause prompts when `true`. Defaults to `false`. +* `cifmw_backup_restore_ovn_db`: (Boolean) When `true` (default), the **backup** path labels OVN NB/SB PVCs and runs `ovsdb-client` backup before the OADP PVC backup, and the **restore** path runs OVN NB/SB file restore after Galera (when timestamped files exist on the PVC) before resuming the full control plane. Set to `false` to skip both; post-EDPM `neutron-ovn-db-sync` still runs when OVN files were not backed up. +* `cifmw_backup_restore_ovn_db_ready_timeout`: (String) Timeout for `oc wait` on OVN database pods during OVN backup/restore. Defaults to `5m`. + +### Backup + +* `cifmw_backup_restore_galera_backup_timeout`: (String) Timeout for `oc wait` on Galera backup jobs. Defaults to `10m`. +* `cifmw_backup_restore_galera_storage_class`: (String) StorageClass for Galera backup PVCs. Empty string uses the cluster default. Defaults to `""`. +* `cifmw_backup_restore_galera_storage_request`: (String) Size of the Galera backup PVC. Defaults to `5Gi`. +* `cifmw_backup_restore_galera_transfer_storage_request`: (String) Size of the Galera transfer storage PVC. Defaults to `5Gi`. +* `cifmw_backup_restore_oadp_backup_timeout`: (String) Timeout for OADP PVC and resource backup completion. Defaults to `30m`. +* `cifmw_backup_restore_storage_location`: (String) Velero `BackupStorageLocation` name. Defaults to `velero-1`. +* `cifmw_backup_restore_backup_ttl`: (String) TTL for Velero backups. Defaults to `720h`. +* `cifmw_backup_restore_snapshot_move_data`: (Boolean) Enable Velero snapshot data mover. When `true`, cleanup also deletes labeled PVCs. Defaults to `true`. + +### Restore + +* `cifmw_backup_restore_backup_timestamp`: (String) Timestamp suffix that identifies the backup to restore (e.g. `20260311-081234`). **Required** when `cifmw_backup_restore_action` is `restore`. +* `cifmw_backup_restore_restore_timeout`: (Integer) Seconds to wait for each Velero Restore to reach a terminal phase. Defaults to `900`. +* `cifmw_backup_restore_infra_ready_timeout`: (String) Timeout for `oc wait` on `OpenStackControlPlaneInfrastructureReady`. Defaults to `20m`. +* `cifmw_backup_restore_ctlplane_ready_timeout`: (String) Timeout for `oc wait` on control plane `Ready` after removing the deployment-stage annotation. Defaults to `10m`. +* `cifmw_backup_restore_strict_restore`: (Boolean) Fail on Velero `PartiallyFailed` status when `true`; only warn when `false`. Defaults to `true`. +* `cifmw_backup_restore_restore_content`: (String) Content flag passed to `restore_galera` (`--content`). Defaults to `data`. +* `cifmw_backup_restore_edpm_deploy_timeout`: (String) Timeout for `oc wait` on the post-restore EDPM deployment. Defaults to `40m`. +* `cifmw_backup_restore_pin_pvcs`: (Boolean) Enable PVC-to-node pinning during restore for WaitForFirstConsumer storage classes. Defaults to `false`. +* Post-EDPM **Neutron–OVN** steps follow [user guide Step 12](https://github.com/openstack-k8s-operators/dev-docs/blob/main/backup-restore/user-guide.md#step-12-verify-and-sync-neutron-to-ovn): run `neutron-ovn-db-sync-util` in `log` mode first (`neutron-dist.conf`, `neutron.conf`, `neutron.conf.d`). **Repair** runs if `cifmw_backup_restore_ovn_db` is `false` (no OVN NB/SB file backup was taken), or if log-mode stdout/stderr contains a `WARNING` line—Neutron reports drift that way while still exiting 0. If OVN file backup/restore was enabled and log output has no `WARNING` lines, repair is skipped as redundant. + +### Cleanup + +* `cifmw_backup_restore_cleanup_ctlplane`: (Boolean) Delete control-plane resources during cleanup. Defaults to `true`. +* `cifmw_backup_restore_cleanup_dataplane`: (Boolean) Delete dataplane resources during cleanup. Defaults to `true`. + +## Examples + +### Running a backup + +```YAML +- hosts: localhost + tasks: + - name: Backup OpenStack + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: backup + cifmw_backup_restore_namespace: openstack + cifmw_backup_restore_auto_ack: true +``` + +### Restoring from a backup + +```YAML +- hosts: localhost + tasks: + - name: Restore OpenStack + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: restore + cifmw_backup_restore_backup_timestamp: "20260311-081234" + cifmw_backup_restore_auto_ack: true +``` + +### Cleaning up before a restore + +```YAML +- hosts: localhost + tasks: + - name: Cleanup namespace + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: cleanup + cifmw_backup_restore_auto_ack: true + cifmw_backup_restore_cleanup_ctlplane: true + cifmw_backup_restore_cleanup_dataplane: true +``` diff --git a/roles/cifmw_backup_restore/defaults/main.yml b/roles/cifmw_backup_restore/defaults/main.yml new file mode 100644 index 000000000..91493fb82 --- /dev/null +++ b/roles/cifmw_backup_restore/defaults/main.yml @@ -0,0 +1,76 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# All variables intended for modification should be placed in this file. +# All variables within this role should have a prefix of "cifmw_backup_restore" + +# Action to perform: backup, restore, or cleanup +cifmw_backup_restore_action: "" + +# Common +cifmw_backup_restore_namespace: "{{ cifmw_openstack_namespace | default('openstack') }}" +cifmw_backup_restore_oadp_namespace: openshift-adp +cifmw_backup_restore_auto_ack: false + +# End-to-end orchestration (tasks/e2e.yml; invoked from post-deployment or playbooks/backup_restore.yaml) +cifmw_backup_restore_install_deps: true +cifmw_backup_restore_create_workload: true +cifmw_backup_restore_run_backup: true +cifmw_backup_restore_run_cleanup: true +cifmw_backup_restore_run_restore: true +cifmw_backup_restore_run_post_tempest: false + +# Passthrough to update role when creating the test workload (prefix matches update role, not this role) +cifmw_update_ping_test: true +cifmw_update_control_plane_check: false +cifmw_update_artifacts_basedir_suffix: "tests/update" +cifmw_update_artifacts_basedir: "{{ ansible_user_dir }}/ci-framework-data/{{ cifmw_update_artifacts_basedir_suffix }}" +cifmw_update_workload_launch_script: "{{ cifmw_update_artifacts_basedir }}/workload_launch.sh" +cifmw_update_timestamper_cmd: >- + | awk '{ print strftime("%Y-%m-%d %H:%M:%S |"), $0; fflush(); }' +cifmw_update_ping_start_script: "{{ cifmw_update_artifacts_basedir }}/l3_agent_start_ping.sh" +cifmw_update_ping_stop_script: "{{ cifmw_update_artifacts_basedir }}/l3_agent_stop_ping.sh" +cifmw_update_namespace: "{{ cifmw_backup_restore_namespace }}" + +# Backup +cifmw_backup_restore_galera_backup_timeout: 10m +cifmw_backup_restore_galera_storage_class: "" +cifmw_backup_restore_galera_storage_request: 5Gi +cifmw_backup_restore_galera_transfer_storage_request: 5Gi +cifmw_backup_restore_oadp_backup_timeout: 30m +cifmw_backup_restore_storage_location: velero-1 +cifmw_backup_restore_backup_ttl: 720h +cifmw_backup_restore_snapshot_move_data: true +cifmw_backup_restore_swift_xattr_timeout: 600s + +# OVN NB/SB database files on PVCs (user-guide backup Step 3 / restore Step 8) +cifmw_backup_restore_ovn_db: true +cifmw_backup_restore_ovn_db_ready_timeout: 5m + +# Restore +# cifmw_backup_restore_backup_timestamp: REQUIRED for restore (e.g., 20260311-081234) +cifmw_backup_restore_restore_timeout: 900 +cifmw_backup_restore_edpm_deploy_timeout: 40m +cifmw_backup_restore_infra_ready_timeout: 20m +cifmw_backup_restore_ctlplane_ready_timeout: 10m +cifmw_backup_restore_strict_restore: true +cifmw_backup_restore_restore_content: data +cifmw_backup_restore_pin_pvcs: false + +# Cleanup +cifmw_backup_restore_cleanup_ctlplane: true +cifmw_backup_restore_cleanup_dataplane: true diff --git a/roles/cifmw_backup_restore/meta/main.yml b/roles/cifmw_backup_restore/meta/main.yml new file mode 100644 index 000000000..3a28894d9 --- /dev/null +++ b/roles/cifmw_backup_restore/meta/main.yml @@ -0,0 +1,31 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +galaxy_info: + author: CI Framework + description: CI Framework Role -- OpenStack Backup and Restore + company: Red Hat + license: Apache-2.0 + min_ansible_version: "2.14" + namespace: cifmw + galaxy_tags: + - cifmw + - openstack + - backup + - restore + +dependencies: [] diff --git a/roles/cifmw_backup_restore/tasks/backup.yml b/roles/cifmw_backup_restore/tasks/backup.yml new file mode 100644 index 000000000..333a2d66c --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/backup.yml @@ -0,0 +1,320 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# OpenStack Full Backup +# +# 1. Trigger Galera database dumps (creates fresh DB dumps on PVCs) +# 2. Optional: label OVN PVCs and ovsdb-client backup (NB/SB) onto PVCs +# 3. OADP PVC backup (CSI snapshots of labeled PVCs) +# 4. OADP resources backup (CRs, Secrets, ConfigMaps, NADs, etc.) + +# ======================================== +# Pre-flight checks +# ======================================== +- name: Verify OADP operator is installed + ansible.builtin.shell: | + oc get crd backups.velero.io -o name + register: oadp_crd_check + changed_when: false + failed_when: false + +- name: Fail if OADP is not installed + ansible.builtin.fail: + msg: | + OADP operator is not installed (Velero Backup CRD not found). + Install OADP first. + when: oadp_crd_check.rc != 0 + +- name: Verify VolumeSnapshotClass with Velero label exists + ansible.builtin.shell: | + oc get volumesnapshotclass -l velero.io/csi-volumesnapshot-class=true -o name + register: vsc_check + changed_when: false + failed_when: false + +- name: Fail if no VolumeSnapshotClass with Velero label + ansible.builtin.fail: + msg: | + No VolumeSnapshotClass found with label velero.io/csi-volumesnapshot-class=true. + Label your VolumeSnapshotClass for Velero CSI snapshots. + when: vsc_check.rc != 0 or vsc_check.stdout == "" + +- name: Verify OpenStackBackupConfig exists + ansible.builtin.shell: | + oc get openstackbackupconfig -n {{ cifmw_backup_restore_namespace }} -o jsonpath='{.items[0].metadata.name}' + register: _backupconfig_check + changed_when: false + failed_when: false + +- name: Fail if OpenStackBackupConfig is not deployed + ansible.builtin.fail: + msg: | + No OpenStackBackupConfig CR found in namespace {{ cifmw_backup_restore_namespace }}. + The BackupConfig controller labels secrets, configmaps, and other resources + for backup/restore. Without it, user-provided resources (e.g. osp-secret) + will not be restored. + Create an OpenStackBackupConfig CR before running backup. + when: _backupconfig_check.rc != 0 or _backupconfig_check.stdout == "" + +- name: Verify resources are labeled for restore + ansible.builtin.shell: | + set -o pipefail + SECRET_COUNT=$(oc get secret -n {{ cifmw_backup_restore_namespace }} -l backup.openstack.org/restore=true --no-headers 2>/dev/null | wc -l) + CM_COUNT=$(oc get configmap -n {{ cifmw_backup_restore_namespace }} -l backup.openstack.org/restore=true --no-headers 2>/dev/null | wc -l) + echo "Labeled secrets: ${SECRET_COUNT}" + echo "Labeled configmaps: ${CM_COUNT}" + if [ "${SECRET_COUNT}" -eq 0 ]; then + echo "WARNING: No secrets labeled for restore" >&2 + exit 1 + fi + register: _labeled_resources_check + changed_when: false + failed_when: false + +- name: Display labeled resource counts + ansible.builtin.debug: + msg: "{{ _labeled_resources_check.stdout_lines }}" + +- name: Fail if no resources are labeled + ansible.builtin.fail: + msg: | + No secrets are labeled with backup.openstack.org/restore=true. + The OpenStackBackupConfig controller may not have reconciled yet. + Check: oc get openstackbackupconfig -n {{ cifmw_backup_restore_namespace }} -o yaml + when: _labeled_resources_check.rc != 0 + +- name: Set backup timestamp + ansible.builtin.set_fact: + cifmw_backup_restore_backup_name_suffix: "{{ lookup('pipe', 'date +%Y%m%d-%H%M%S') }}" + +- name: Collect operator version information + ansible.builtin.shell: | + oc get csv -n openstack-operators \ + -l operators.coreos.com/openstack-operator.openstack-operators \ + -o jsonpath='{.items[0].metadata.name}' + register: _operator_csv_version + changed_when: false + failed_when: false + +- name: Fail if OpenStack operator CSV could not be determined + ansible.builtin.fail: + msg: | + Could not read OpenStack operator ClusterServiceVersion in namespace openstack-operators + (oc get csv -l operators.coreos.com/openstack-operator.openstack-operators). + when: > + (_operator_csv_version.rc | default(-1)) != 0 or + ((_operator_csv_version.stdout | default('')) | trim | length) == 0 + +- name: Collect catalog source image + ansible.builtin.shell: | + oc get catalogsource -n openstack-operators \ + -o jsonpath='{.items[0].spec.image}' + register: _operator_catalog_image + changed_when: false + failed_when: false + +- name: Collect operator image + ansible.builtin.shell: | + oc get deployment openstack-operator-controller-manager \ + -n openstack-operators -o jsonpath='{.spec.template.spec.containers[0].image}' + register: _operator_image + changed_when: false + failed_when: false + +- name: Set operator version facts + ansible.builtin.set_fact: + _backup_csv_version: "{{ _operator_csv_version.stdout }}" + _backup_catalog_image: "{{ _operator_catalog_image.stdout | default('unknown') }}" + _backup_operator_image: "{{ _operator_image.stdout | default('unknown') }}" + +- name: Display operator version information + ansible.builtin.debug: + msg: + - "CSV version: {{ _backup_csv_version }}" + - "Catalog image: {{ _backup_catalog_image }}" + - "Operator image: {{ _backup_operator_image }}" + +- name: Create temp directory for rendered templates + ansible.builtin.tempfile: + state: directory + prefix: openstack-backup- + register: _cifmw_backup_restore_rendered_dir + +# ======================================== +# Step 1: Trigger Galera Database Dumps +# ======================================== +# GaleraBackup CRs and cronjobs are created by setup_galerabackup.yml +# (run as part of install_deps). This step only triggers the dump jobs. +- name: "Step 1: Trigger Galera database dumps" + ansible.builtin.debug: + msg: + - "========================================" + - "Step 1: Trigger Galera Database Dumps" + - "========================================" + +- name: Get Galera backup cronjobs + ansible.builtin.shell: | + oc get cronjob -n {{ cifmw_backup_restore_namespace }} -l app=galera -o jsonpath='{.items[*].metadata.name}' + register: _galera_backup_cronjobs + changed_when: false + +- name: Fail if no GaleraBackup cronjobs found + ansible.builtin.fail: + msg: | + No GaleraBackup cronjobs found. Run with cifmw_backup_restore_install_deps=true + first to create GaleraBackup CRs. + when: _galera_backup_cronjobs.stdout == "" + +- name: Trigger Galera backup jobs + ansible.builtin.shell: | + set -o pipefail + BACKUP_JOB_NAME="{{ item }}-{{ cifmw_backup_restore_backup_name_suffix }}" + oc -n {{ cifmw_backup_restore_namespace }} create job --from=cronjob/{{ item }} ${BACKUP_JOB_NAME} \ + --dry-run=client -o json | \ + jq '.spec.template.spec.containers[0].env += [{"name":"BACKUP_TIMESTAMP","value":"{{ cifmw_backup_restore_backup_name_suffix }}"}]' | \ + oc -n {{ cifmw_backup_restore_namespace }} create -f - + echo ${BACKUP_JOB_NAME} + loop: "{{ _galera_backup_cronjobs.stdout.split() }}" + register: _galera_backup_jobs + changed_when: true + when: _galera_backup_cronjobs.stdout != "" + +- name: Wait for Galera backup jobs to complete + ansible.builtin.shell: | + oc -n {{ cifmw_backup_restore_namespace }} wait --for=condition=complete job/{{ item.stdout_lines[-1] }} --timeout={{ cifmw_backup_restore_galera_backup_timeout }} + loop: "{{ _galera_backup_jobs.results }}" + changed_when: false + when: _galera_backup_cronjobs.stdout != "" + +# ======================================== +# Step 2: OVN database backup (label PVCs; ovsdb-client backup) +# ======================================== +- name: Include OVN database backup tasks + ansible.builtin.include_tasks: ovn_db_backup.yml + when: cifmw_backup_restore_ovn_db | bool + +# ======================================== +# Step 3: OADP PVC Backup (CSI Snapshots) +# ======================================== +- name: List PVCs marked for backup + ansible.builtin.shell: | + oc get pvc -n {{ cifmw_backup_restore_namespace }} -l backup.openstack.org/backup=true \ + -o custom-columns=NAME:.metadata.name,SIZE:.spec.resources.requests.storage --no-headers + register: _labeled_pvcs + changed_when: false + failed_when: false + +- name: Print labeled PVCs + ansible.builtin.debug: + msg: "{{ _labeled_pvcs.stdout_lines }}" + when: _labeled_pvcs.stdout_lines | default([]) | length > 0 + +- name: Render PVC backup CR + ansible.builtin.template: + src: backup-pvcs.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/backup-pvcs.yaml" + mode: "0644" + +- name: Pause before PVC backup + ansible.builtin.pause: + prompt: >- + Step 1 complete: Galera DB dumps{% if cifmw_backup_restore_ovn_db | bool %} + ; Step 2 complete: OVN DB backups on PVCs{% endif %}. + Press Enter to create PVC backup, or Ctrl+C then 'A' to abort + when: not (cifmw_backup_restore_auto_ack | bool) + +- name: Create OADP PVC backup + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/backup-pvcs.yaml" + state: present + +- name: Wait for PVC backup to complete + ansible.builtin.command: + cmd: >- + oc get backup openstack-backup-pvcs-{{ cifmw_backup_restore_backup_name_suffix }} + -n {{ cifmw_backup_restore_oadp_namespace }} + -o jsonpath='{.status.phase}' + register: _pvc_backup_phase + changed_when: false + until: _pvc_backup_phase.stdout in ["Completed", "Failed", "PartiallyFailed"] + retries: "{{ (cifmw_backup_restore_oadp_backup_timeout | regex_replace('[^0-9]', '') | int * 60 / 10) | int }}" + delay: 10 + +- name: Fail if PVC backup did not complete + ansible.builtin.fail: + msg: "PVC backup ended with phase: {{ _pvc_backup_phase.stdout }}" + when: _pvc_backup_phase.stdout != "Completed" + +# ======================================== +# Step 4: OADP Resources Backup +# ======================================== +- name: Render resources backup CR + ansible.builtin.template: + src: backup-resources.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/backup-resources.yaml" + mode: "0644" + +- name: Pause before resources backup + ansible.builtin.pause: + prompt: >- + Step 3 complete: PVC backup. Press Enter to create resources backup, or Ctrl+C then 'A' to abort + when: not (cifmw_backup_restore_auto_ack | bool) + +- name: Create OADP resources backup + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/backup-resources.yaml" + state: present + +- name: Wait for resources backup to complete + ansible.builtin.command: + cmd: >- + oc get backup openstack-backup-resources-{{ cifmw_backup_restore_backup_name_suffix }} + -n {{ cifmw_backup_restore_oadp_namespace }} + -o jsonpath='{.status.phase}' + register: _resources_backup_phase + changed_when: false + until: _resources_backup_phase.stdout in ["Completed", "Failed", "PartiallyFailed"] + retries: "{{ (cifmw_backup_restore_oadp_backup_timeout | regex_replace('[^0-9]', '') | int * 60 / 10) | int }}" + delay: 10 + +- name: Fail if resources backup did not complete + ansible.builtin.fail: + msg: "Resources backup ended with phase: {{ _resources_backup_phase.stdout }}" + when: _resources_backup_phase.stdout != "Completed" + +# ======================================== +# Summary +# ======================================== +- name: Print backup summary + ansible.builtin.debug: + msg: + - "========================================" + - "Backup Complete" + - "========================================" + - "" + - "Backup name suffix: {{ cifmw_backup_restore_backup_name_suffix }}" + - "PVC backup: openstack-backup-pvcs-{{ cifmw_backup_restore_backup_name_suffix }}" + - "Resources backup: openstack-backup-resources-{{ cifmw_backup_restore_backup_name_suffix }}" + - "" + - "Operator version recorded on Backup CRs:" + - " CSV: {{ _backup_csv_version }}" + - " Catalog: {{ _backup_catalog_image }}" + - " Image: {{ _backup_operator_image }}" + +- name: Cleanup rendered templates + ansible.builtin.file: + path: "{{ _cifmw_backup_restore_rendered_dir.path }}" + state: absent diff --git a/roles/cifmw_backup_restore/tasks/cleanup.yml b/roles/cifmw_backup_restore/tasks/cleanup.yml new file mode 100644 index 000000000..ce1e1b178 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/cleanup.yml @@ -0,0 +1,309 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# OpenStack Cleanup for Restore +# +# Cleans up OpenStack control plane and data plane resources to prepare +# for a restore from OADP backup. + +- name: Print cleanup header + ansible.builtin.debug: + msg: + - "========================================" + - "OpenStack Cleanup for Restore" + - "========================================" + - "Namespace: {{ cifmw_backup_restore_namespace }}" + - "Cleanup ControlPlane: {{ cifmw_backup_restore_cleanup_ctlplane }}" + - "Cleanup DataPlane: {{ cifmw_backup_restore_cleanup_dataplane }}" + - "Delete labeled PVCs (Data Mover): {{ cifmw_backup_restore_snapshot_move_data }}" + +- name: Check if namespace exists + ansible.builtin.shell: | + oc get namespace {{ cifmw_backup_restore_namespace }} + register: _namespace_check + changed_when: false + failed_when: false + +- name: Exit if namespace doesn't exist + ansible.builtin.debug: + msg: "Namespace {{ cifmw_backup_restore_namespace }} does not exist. Nothing to clean up." + when: _namespace_check.rc != 0 + +- name: End cleanup if namespace doesn't exist + ansible.builtin.meta: end_host + when: _namespace_check.rc != 0 + +- name: Gather resource counts + ansible.builtin.shell: | + set -o pipefail + echo "OpenStackControlPlane: $(oc get openstackcontrolplane -n {{ cifmw_backup_restore_namespace }} --no-headers 2>/dev/null | wc -l)" + echo "DataPlaneNodeSets: $(oc get openstackdataplanenodeset -n {{ cifmw_backup_restore_namespace }} --no-headers 2>/dev/null | wc -l)" + echo "GaleraBackup: $(oc get galerabackup -n {{ cifmw_backup_restore_namespace }} --no-headers 2>/dev/null | wc -l)" + echo "Labeled PVCs: $(oc get pvc -n {{ cifmw_backup_restore_namespace }} -l backup.openstack.org/backup=true --no-headers 2>/dev/null | wc -l)" + register: _resource_counts + changed_when: false + +- name: Display found resources + ansible.builtin.debug: + msg: "{{ _resource_counts.stdout_lines }}" + +- name: Confirm deletion + ansible.builtin.pause: + prompt: "WARNING: This will delete OpenStack resources in {{ cifmw_backup_restore_namespace }}. Continue? (yes/no)" + register: _delete_confirm + when: not (cifmw_backup_restore_auto_ack | bool) + +- name: Fail if not confirmed + ansible.builtin.fail: + msg: "Cleanup cancelled by user" + when: not (cifmw_backup_restore_auto_ack | bool) and _delete_confirm.user_input != "yes" + +# ======================================== +# DataPlane Cleanup +# ======================================== +- name: Delete DataPlaneDeployment CRs + ansible.builtin.shell: | + oc delete openstackdataplanedeployment --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_dataplane | bool + +- name: Delete DataPlaneNodeSet CRs + ansible.builtin.shell: | + oc delete openstackdataplanenodeset --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_dataplane | bool + +- name: Delete DataPlaneService CRs + ansible.builtin.shell: | + oc delete openstackdataplaneservice --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_dataplane | bool + +- name: Delete NetConfig CRs + ansible.builtin.shell: | + oc delete netconfig --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_dataplane | bool + +- name: Wait for dataplane pods to terminate + ansible.builtin.shell: | + set -o pipefail + oc get pods -n {{ cifmw_backup_restore_namespace }} -l app=openstackansibleee --no-headers 2>/dev/null | wc -l + register: _dp_pod_count + until: _dp_pod_count.stdout|int == 0 + retries: 30 + delay: 10 + changed_when: false + failed_when: false + when: cifmw_backup_restore_cleanup_dataplane | bool + +# ======================================== +# ControlPlane Cleanup +# ======================================== +- name: Get PVC-pin dummy Deployments + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_backup_restore_namespace }}" + label_selectors: + - app=pvc-pin + register: _pvc_pin_deployments + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete PVC-pin dummy Deployments + kubernetes.core.k8s: + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_backup_restore_namespace }}" + name: "{{ item.metadata.name }}" + state: absent + loop: "{{ _pvc_pin_deployments.resources | default([]) }}" + loop_control: + label: "{{ item.metadata.name }}" + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete openstack-restore-tmp namespace + ansible.builtin.shell: | + if oc get namespace openstack-restore-tmp &>/dev/null; then + for SECRET in $(oc get secret -n openstack-restore-tmp -o name 2>/dev/null); do + oc patch ${SECRET} -n openstack-restore-tmp --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true + done + oc delete namespace openstack-restore-tmp --wait=true --timeout=60s + fi + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete GaleraRestore CRs + ansible.builtin.shell: | + oc delete galerarestore --all -n {{ cifmw_backup_restore_namespace }} --wait=true + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete GaleraBackup CRs + ansible.builtin.shell: | + oc delete galerabackup --all -n {{ cifmw_backup_restore_namespace }} --wait=true + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete OpenStackControlPlane CR + ansible.builtin.shell: | + oc delete openstackcontrolplane --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Wait for pods to terminate + ansible.builtin.command: + cmd: >- + oc get pods -n {{ cifmw_backup_restore_namespace }} + --field-selector=status.phase!=Succeeded --no-headers + register: _remaining_pods + changed_when: false + failed_when: false + until: _remaining_pods.stdout_lines | length == 0 + retries: 12 + delay: 5 + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Force delete stuck pods if still remaining + when: + - cifmw_backup_restore_cleanup_ctlplane | bool + - _remaining_pods.stdout_lines | default([]) | length > 0 + block: + - name: Force delete remaining pods + ansible.builtin.command: + cmd: >- + oc delete pods --all -n {{ cifmw_backup_restore_namespace }} + --force --grace-period=0 + changed_when: true + failed_when: false + + - name: Wait for pods after force delete + ansible.builtin.command: + cmd: >- + oc get pods -n {{ cifmw_backup_restore_namespace }} + --field-selector=status.phase!=Succeeded --no-headers + register: _remaining_pods_final + changed_when: false + failed_when: false + until: _remaining_pods_final.stdout_lines | length == 0 + retries: 48 + delay: 5 + +- name: Delete OpenStackBackupConfig CRs + ansible.builtin.shell: | + oc delete openstackbackupconfig --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete OpenStackVersion CRs + ansible.builtin.shell: | + oc delete openstackversion --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete RabbitMQUser CRs + ansible.builtin.shell: | + oc delete rabbitmquser --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete Certificate CRs (cert-manager) + ansible.builtin.shell: | + oc delete certificate --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete cert secrets + ansible.builtin.shell: | + set -o pipefail + for i in $(oc get secret -n {{ cifmw_backup_restore_namespace }} -o name | grep cert | grep -v edpm | grep -v ceph-conf); do + oc delete -n {{ cifmw_backup_restore_namespace }} $i + done + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete CA bundle secrets + ansible.builtin.shell: | + oc delete secret -n {{ cifmw_backup_restore_namespace }} rootca-internal rootca-libvirt rootca-ovn rootca-public combined-ca-bundle 2>/dev/null || true + changed_when: true + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete remaining user-provided secrets + ansible.builtin.shell: | + set -o pipefail + for i in $(oc get secret -n {{ cifmw_backup_restore_namespace }} -o name \ + | grep -v dockercfg \ + | grep -v service-account-token); do + oc delete -n {{ cifmw_backup_restore_namespace }} $i + done + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete ConfigMaps + ansible.builtin.shell: | + oc delete configmap --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +- name: Delete DNSData CRs + ansible.builtin.shell: | + oc delete dnsdata --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +# ======================================== +# Delete all PVCs +# ======================================== +- name: Delete all PVCs in namespace + ansible.builtin.shell: | + oc delete pvc --all -n {{ cifmw_backup_restore_namespace }} + changed_when: true + failed_when: false + when: cifmw_backup_restore_cleanup_ctlplane | bool + +# ======================================== +# Summary +# ======================================== +- name: Verify cleanup + ansible.builtin.shell: | + oc get all,pvc -n {{ cifmw_backup_restore_namespace }} + register: _remaining_resources + changed_when: false + failed_when: false + +- name: Display remaining resources + ansible.builtin.debug: + msg: "{{ _remaining_resources.stdout_lines }}" + +- name: Print cleanup complete + ansible.builtin.debug: + msg: "Namespace {{ cifmw_backup_restore_namespace }} is ready for restore." diff --git a/roles/cifmw_backup_restore/tasks/e2e.yml b/roles/cifmw_backup_restore/tasks/e2e.yml new file mode 100644 index 000000000..b7ae118ae --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/e2e.yml @@ -0,0 +1,254 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# End-to-end backup/restore test orchestration (hooks, deps, workload, +# backup/cleanup/restore, validation, optional tempest). +# Variables: defaults/main.yml (cifmw_backup_restore_install_deps, …). + +- name: Run pre_backup_restore hooks + vars: + step: pre_backup_restore + ansible.builtin.import_role: + name: run_hook + +# ======================================== +# Step 1: Install dependencies +# ======================================== +- name: Setup MinIO + ansible.builtin.include_role: + name: deploy_minio + when: cifmw_backup_restore_install_deps | bool + +- name: Setup OADP + ansible.builtin.include_role: + name: openshift_adp + vars: + cifmw_openshift_adp_s3_access_key: "{{ cifmw_deploy_minio_access_key }}" + cifmw_openshift_adp_s3_secret_key: "{{ cifmw_deploy_minio_secret_key }}" + when: cifmw_backup_restore_install_deps | bool + +- name: Setup GaleraBackup CRs + ansible.builtin.include_role: + name: cifmw_backup_restore + tasks_from: setup_galerabackup.yml + when: cifmw_backup_restore_install_deps | bool + +# ======================================== +# Step 2: Create test workload +# ======================================== +- name: Create test workload + when: cifmw_backup_restore_create_workload | bool + block: + - name: Ensure update artifacts directory exists + ansible.builtin.file: + path: "{{ cifmw_update_artifacts_basedir }}" + state: directory + mode: "0755" + + - name: Generate workload launch script + ansible.builtin.include_role: + name: update + tasks_from: create_test_files.yml + vars: + cifmw_update_ping_test: "{{ cifmw_backup_restore_create_workload | bool }}" + + - name: Create local openstackclient container + ansible.builtin.include_role: + name: update + tasks_from: create_local_openstackclient.yml + + - name: Create the workload instance + ansible.builtin.include_role: + name: update + tasks_from: create_instance.yml + +# ======================================== +# Step 3: Create backup +# ======================================== +- name: Create backup + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: backup + cifmw_backup_restore_auto_ack: true + when: cifmw_backup_restore_run_backup | bool + +- name: Print backup timestamp + ansible.builtin.debug: + msg: "Backup completed with timestamp: {{ cifmw_backup_restore_backup_name_suffix }}" + when: cifmw_backup_restore_run_backup | bool + +# ======================================== +# Step 4: Cleanup namespace +# ======================================== +- name: Cleanup + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: cleanup + cifmw_backup_restore_auto_ack: true + when: cifmw_backup_restore_run_cleanup | bool + +# ======================================== +# Step 5: Restore from backup +# ======================================== +- name: Resolve backup timestamp for restore (from backup run or extra-var) + ansible.builtin.set_fact: + _cifmw_backup_restore_e2e_restore_timestamp: >- + {{ cifmw_backup_restore_backup_name_suffix | default(cifmw_backup_restore_backup_timestamp, true) }} + when: cifmw_backup_restore_run_restore | bool + +- name: Restore + ansible.builtin.include_role: + name: cifmw_backup_restore + vars: + cifmw_backup_restore_action: restore + cifmw_backup_restore_backup_timestamp: "{{ _cifmw_backup_restore_e2e_restore_timestamp }}" + cifmw_backup_restore_auto_ack: true + when: cifmw_backup_restore_run_restore | bool + +# ======================================== +# Step 6: Post-restore workload validation +# ======================================== +- name: Validate test workload after restore + when: + - cifmw_backup_restore_create_workload | bool + - cifmw_backup_restore_run_restore | bool + vars: + _os_exec: >- + oc exec -t openstackclient -n {{ cifmw_backup_restore_namespace }} -- + block: + - name: Verify compute services are up + ansible.builtin.shell: | + set -o pipefail + {{ _os_exec }} openstack compute service list -f json | \ + jq -r '.[] | "\(.Binary) \(.Host) \(.State)"' + register: _compute_services + changed_when: false + + - name: Display compute services + ansible.builtin.debug: + msg: "{{ _compute_services.stdout_lines }}" + + - name: Verify network agents are up + ansible.builtin.shell: | + set -o pipefail + {{ _os_exec }} openstack network agent list -f json | \ + jq -r '.[] | "\(.["Agent Type"]) \(.Host) \(.Alive)"' + register: _network_agents + changed_when: false + + - name: Display network agents + ansible.builtin.debug: + msg: "{{ _network_agents.stdout_lines }}" + + - name: Get instance info + ansible.builtin.shell: | + set -o pipefail + {{ _os_exec }} openstack server list -f json | \ + jq -r '.[0] | "\(.Name) \(.Status) \(.Networks)"' + register: _instance_info + changed_when: false + + - name: Display instance info + ansible.builtin.debug: + msg: "Instance: {{ _instance_info.stdout }}" + + - name: Get floating IP of test instance + ansible.builtin.shell: | + set -o pipefail + {{ _os_exec }} openstack server list -f json | \ + jq -r '.[0].Networks' | grep -oP '[\d.]+' | tail -1 + register: _instance_fip + changed_when: false + + - name: Ping floating IP + ansible.builtin.shell: | + ping -c 3 -W 5 {{ _instance_fip.stdout }} + register: _ping_result + changed_when: false + retries: 6 + delay: 10 + until: _ping_result.rc == 0 + + - name: Display ping result + ansible.builtin.debug: + msg: "Ping to {{ _instance_fip.stdout }}: SUCCESS" + + - name: Stop test instance + ansible.builtin.shell: | + set -o pipefail + INSTANCE=$({{ _os_exec }} openstack server list -f json | jq -r '.[0].Name') + {{ _os_exec }} openstack server stop ${INSTANCE} + changed_when: true + + - name: Wait for instance to stop + ansible.builtin.shell: | + set -o pipefail + INSTANCE=$({{ _os_exec }} openstack server list -f json | jq -r '.[0].Name') + {{ _os_exec }} openstack server show ${INSTANCE} -f json | jq -r '.status' + register: _stop_status + changed_when: false + retries: 12 + delay: 5 + until: _stop_status.stdout == 'SHUTOFF' + + - name: Start test instance + ansible.builtin.shell: | + set -o pipefail + INSTANCE=$({{ _os_exec }} openstack server list -f json | jq -r '.[0].Name') + {{ _os_exec }} openstack server start ${INSTANCE} + changed_when: true + + - name: Wait for instance to become active + ansible.builtin.shell: | + set -o pipefail + INSTANCE=$({{ _os_exec }} openstack server list -f json | jq -r '.[0].Name') + {{ _os_exec }} openstack server show ${INSTANCE} -f json | jq -r '.status' + register: _start_status + changed_when: false + retries: 12 + delay: 5 + until: _start_status.stdout == 'ACTIVE' + + - name: Ping floating IP after stop/start + ansible.builtin.shell: | + ping -c 3 -W 5 {{ _instance_fip.stdout }} + register: _ping_after_restart + changed_when: false + retries: 6 + delay: 10 + until: _ping_after_restart.rc == 0 + + - name: Post-restore workload validation passed + ansible.builtin.debug: + msg: >- + Workload validation passed: instance reachable via FIP {{ _instance_fip.stdout }}, + stop/start successful, ping after restart OK + +# ======================================== +# Step 7: Post-restore tempest validation +# ======================================== +- name: Run post-restore tempest validation + ansible.builtin.include_role: + name: "{{ cifmw_run_test_role | default('test_operator') }}" + when: cifmw_backup_restore_run_post_tempest | bool + +- name: Run post_backup_restore hooks + vars: + step: post_backup_restore + ansible.builtin.import_role: + name: run_hook diff --git a/roles/cifmw_backup_restore/tasks/main.yml b/roles/cifmw_backup_restore/tasks/main.yml new file mode 100644 index 000000000..0bab11749 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/main.yml @@ -0,0 +1,25 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: Validate action parameter + ansible.builtin.fail: + msg: >- + cifmw_backup_restore_action must be set to one of: backup, restore, cleanup. + Example: -e cifmw_backup_restore_action=backup + when: cifmw_backup_restore_action not in ['backup', 'restore', 'cleanup'] + +- name: Run {{ cifmw_backup_restore_action }} + ansible.builtin.include_tasks: "{{ cifmw_backup_restore_action }}.yml" diff --git a/roles/cifmw_backup_restore/tasks/ovn_db_backup.yml b/roles/cifmw_backup_restore/tasks/ovn_db_backup.yml new file mode 100644 index 000000000..16a75c408 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/ovn_db_backup.yml @@ -0,0 +1,83 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# OVN Northbound and Southbound DB backups (see dev-docs backup-restore user-guide). +# Backup files are written to OVN PVCs and included in the subsequent OADP PVC backup. + +- name: "Step 2: OVN database backup (label PVCs and ovsdb-client backup)" + ansible.builtin.debug: + msg: + - "========================================" + - "Step 2: OVN Database Backup" + - "========================================" + +- name: Wait for OVN NB database pod + ansible.builtin.command: + cmd: >- + oc wait pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-nb + --for=condition=Ready + --timeout={{ cifmw_backup_restore_ovn_db_ready_timeout }} + changed_when: false + +- name: Wait for OVN SB database pod + ansible.builtin.command: + cmd: >- + oc wait pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-sb + --for=condition=Ready + --timeout={{ cifmw_backup_restore_ovn_db_ready_timeout }} + changed_when: false + +- name: Label OVN NB PVCs for backup and restore + ansible.builtin.command: + cmd: >- + oc label pvc -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-nb + backup.openstack.org/backup=true + backup.openstack.org/restore=true + backup.openstack.org/restore-order=00 + backup.openstack.org/category=controlplane + --overwrite + changed_when: true + +- name: Label OVN SB PVCs for backup and restore + ansible.builtin.command: + cmd: >- + oc label pvc -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-sb + backup.openstack.org/backup=true + backup.openstack.org/restore=true + backup.openstack.org/restore-order=00 + backup.openstack.org/category=controlplane + --overwrite + changed_when: true + +- name: Backup OVN Northbound database to PVC + ansible.builtin.shell: | + set -o pipefail + oc exec ovsdbserver-nb-0 -n {{ cifmw_backup_restore_namespace }} -c ovsdbserver-nb -- \ + bash -c "ovsdb-client backup unix:/etc/ovn/ovnnb_db.sock OVN_Northbound \ + > /etc/ovn/ovnnb_db.db.{{ cifmw_backup_restore_backup_name_suffix }}" + changed_when: true + +- name: Backup OVN Southbound database to PVC + ansible.builtin.shell: | + set -o pipefail + oc exec ovsdbserver-sb-0 -n {{ cifmw_backup_restore_namespace }} -c ovsdbserver-sb -- \ + bash -c "ovsdb-client backup unix:/etc/ovn/ovnsb_db.sock OVN_Southbound \ + > /etc/ovn/ovnsb_db.db.{{ cifmw_backup_restore_backup_name_suffix }}" + changed_when: true diff --git a/roles/cifmw_backup_restore/tasks/ovn_db_restore.yml b/roles/cifmw_backup_restore/tasks/ovn_db_restore.yml new file mode 100644 index 000000000..c6ba801f4 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/ovn_db_restore.yml @@ -0,0 +1,124 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Restore OVN NB/SB databases from timestamped files on the PVCs (user-guide Step 8). +# Must run after Galera restore and before removing deployment-stage. If no backup +# files exist for this timestamp, skip (Neutron–OVN sync at the end repopulates OVN). + +- name: Check for OVN NB backup file from this backup run + ansible.builtin.shell: | + set -o pipefail + oc exec ovsdbserver-nb-0 -n {{ cifmw_backup_restore_namespace }} -c ovsdbserver-nb -- \ + test -f /etc/ovn/ovnnb_db.db.{{ cifmw_backup_restore_backup_timestamp }} + register: _ovn_nb_backup_file + changed_when: false + failed_when: false + when: cifmw_backup_restore_ovn_db | bool + +- name: Set fact — OVN backup files not used (OVN DB steps disabled) + ansible.builtin.set_fact: + _cifmw_backup_restore_ovn_files_present: false + when: not (cifmw_backup_restore_ovn_db | bool) + +- name: Set fact — OVN backup files present on PVC + ansible.builtin.set_fact: + _cifmw_backup_restore_ovn_files_present: "{{ _ovn_nb_backup_file.rc == 0 }}" + when: cifmw_backup_restore_ovn_db | bool + +- name: Skip OVN database file restore (no per-timestamp backup on PVC) + ansible.builtin.debug: + msg: >- + No OVN backup file ovnnb_db.db.{{ cifmw_backup_restore_backup_timestamp }} on ovsdbserver-nb-0; + skipping file restore. OVN will be repopulated via neutron-ovn-db-sync after EDPM. + when: + - cifmw_backup_restore_ovn_db | bool + - not _cifmw_backup_restore_ovn_files_present | bool + +- name: "Step 8: Restore OVN databases from PVC backup files" + when: + - cifmw_backup_restore_ovn_db | bool + - _cifmw_backup_restore_ovn_files_present | bool + block: + - name: Announce OVN database file restore + ansible.builtin.debug: + msg: + - "========================================" + - "Step 8: OVN Database Restore (NB/SB)" + - "========================================" + + - name: Replace OVN DB files and clear follower replicas + ansible.builtin.shell: | + set -euo pipefail + NS="{{ cifmw_backup_restore_namespace }}" + TS="{{ cifmw_backup_restore_backup_timestamp }}" + for db in nb sb; do + oc exec ovsdbserver-${db}-0 -n "${NS}" -c ovsdbserver-${db} -- bash -c \ + "rm -f /etc/ovn/ovn${db}_db.db && \ + cp /etc/ovn/ovn${db}_db.db.${TS} /etc/ovn/ovn${db}_db.db" + COUNT=$(oc get pods -n "${NS}" -l service=ovsdbserver-${db} --no-headers 2>/dev/null | wc -l | awk '{print $1}') + for ((i=1; i/dev/null; then + oc exec "ovsdbserver-${db}-${i}" -n "${NS}" -c ovsdbserver-${db} -- \ + rm -f "/etc/ovn/ovn${db}_db.db" + fi + done + done + changed_when: true + + - name: Force delete OVN NB database pods + ansible.builtin.command: + cmd: >- + oc delete pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-nb + --force + --grace-period=0 + changed_when: true + + - name: Force delete OVN SB database pods + ansible.builtin.command: + cmd: >- + oc delete pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-sb + --force + --grace-period=0 + changed_when: true + + - name: Wait for OVN NB database pods ready + ansible.builtin.command: + cmd: >- + oc wait pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-nb + --for=condition=Ready + --timeout={{ cifmw_backup_restore_ovn_db_ready_timeout }} + changed_when: false + + - name: Wait for OVN SB database pods ready + ansible.builtin.command: + cmd: >- + oc wait pod -n {{ cifmw_backup_restore_namespace }} + -l service=ovsdbserver-sb + --for=condition=Ready + --timeout={{ cifmw_backup_restore_ovn_db_ready_timeout }} + changed_when: false + + - name: Restart OVN control plane pods to reconnect to restored databases + ansible.builtin.shell: | + set -o pipefail + oc delete pod -n {{ cifmw_backup_restore_namespace }} -l service=ovn-northd --ignore-not-found=true + oc delete pod -n {{ cifmw_backup_restore_namespace }} -l service=ovn-controller --ignore-not-found=true + oc delete pod -n {{ cifmw_backup_restore_namespace }} -l service=ovn-controller-ovs --ignore-not-found=true + oc delete pod -n {{ cifmw_backup_restore_namespace }} -l service=ovn-controller-metrics --ignore-not-found=true + changed_when: true diff --git a/roles/cifmw_backup_restore/tasks/restore.yml b/roles/cifmw_backup_restore/tasks/restore.yml new file mode 100644 index 000000000..eae150397 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/restore.yml @@ -0,0 +1,535 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# OpenStack Full Restore +# +# Restores an OpenStack control plane from OADP backups using ordered +# Velero Restore CRs, Galera restore, optional OVN NB/SB file restore, +# then staged ControlPlane resume, dataplane, EDPM, and Neutron–OVN sync. + +- name: Validate backup_timestamp parameter + ansible.builtin.fail: + msg: "cifmw_backup_restore_backup_timestamp is required for restore." + when: cifmw_backup_restore_backup_timestamp is not defined + +- name: Derive backup names from timestamp + ansible.builtin.set_fact: + _pvc_backup_name: "openstack-backup-pvcs-{{ cifmw_backup_restore_backup_timestamp }}" + _resources_backup_name: "openstack-backup-resources-{{ cifmw_backup_restore_backup_timestamp }}" + _restore_suffix: "{{ lookup('pipe', 'date +%Y%m%d-%H%M%S') }}" + +- name: Create temp directory for rendered templates + ansible.builtin.tempfile: + state: directory + prefix: openstack-restore- + register: _cifmw_backup_restore_rendered_dir + +# ======================================== +# Pre-flight checks +# ======================================== +- name: Verify OADP operator is installed + ansible.builtin.shell: | + oc get crd backups.velero.io -o name + register: _oadp_crd_check + changed_when: false + failed_when: false + +- name: Fail if OADP is not installed + ansible.builtin.fail: + msg: "OADP operator is not installed (Velero Backup CRD not found)." + when: _oadp_crd_check.rc != 0 + +- name: Ensure target namespace exists + kubernetes.core.k8s: + api_version: v1 + kind: Namespace + name: "{{ cifmw_backup_restore_namespace }}" + state: present + +- name: Verify OADP backups exist + ansible.builtin.shell: | + oc get backup {{ _pvc_backup_name }} -n {{ cifmw_backup_restore_oadp_namespace }} -o jsonpath='{.status.phase}' + register: _pvc_backup_phase + changed_when: false + +- name: Verify resources backup exists + ansible.builtin.shell: | + oc get backup {{ _resources_backup_name }} -n {{ cifmw_backup_restore_oadp_namespace }} -o jsonpath='{.status.phase}' + register: _resources_backup_phase + changed_when: false + +- name: Fail if backups are not completed + ansible.builtin.fail: + msg: "Backups must be Completed. PVC: {{ _pvc_backup_phase.stdout }}, Resources: {{ _resources_backup_phase.stdout }}" + when: _pvc_backup_phase.stdout != "Completed" or _resources_backup_phase.stdout != "Completed" + +# ======================================== +# Operator version validation +# ======================================== +- name: Get required operator version from backup + ansible.builtin.shell: | + oc get backup {{ _resources_backup_name }} -n {{ cifmw_backup_restore_oadp_namespace }} \ + -o jsonpath='{.metadata.annotations.openstack\.org/csv-version}' + register: _backup_csv_version + changed_when: false + failed_when: false + +- name: Get installed operator version + ansible.builtin.shell: | + oc get csv -n openstack-operators \ + -l operators.coreos.com/openstack-operator.openstack-operators \ + -o jsonpath='{.items[0].metadata.name}' + register: _installed_csv_version + changed_when: false + failed_when: false + +- name: Display operator version comparison + ansible.builtin.debug: + msg: + - "Backup operator version: {{ _backup_csv_version.stdout | default('not recorded') }}" + - "Installed operator version: {{ _installed_csv_version.stdout | default('not installed') }}" + +- name: Warn if operator version does not match backup + ansible.builtin.fail: + msg: | + Operator version mismatch! + Backup requires: {{ _backup_csv_version.stdout }} + Installed version: {{ _installed_csv_version.stdout | default('not installed') }} + Install the correct operator version before restoring. + Catalog source image from backup: + oc get backup {{ _resources_backup_name }} -n {{ cifmw_backup_restore_oadp_namespace }} \ + -o jsonpath='{.metadata.annotations.openstack\.org/catalog-source-image}' + when: + - _backup_csv_version.stdout | default('') != '' + - _installed_csv_version.stdout | default('') != _backup_csv_version.stdout + +# ======================================== +# Resource modifier ConfigMap +# ======================================== +- name: Render resource modifier ConfigMap + ansible.builtin.template: + src: 00-resource-modifiers-configmap.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/00-resource-modifiers-configmap.yaml" + mode: "0644" + +- name: Create resource modifier ConfigMap + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/00-resource-modifiers-configmap.yaml" + state: present + +# ======================================== +# Step 0.5: Pin PVCs to nodes (optional) +# ======================================== +- name: Pin PVCs to original nodes + ansible.builtin.include_tasks: restore_pin_pvcs.yml + when: cifmw_backup_restore_pin_pvcs | bool + +# ======================================== +# Step 1: Restore PVCs (Order 00) +# ======================================== +- name: Render PVC restore + ansible.builtin.template: + src: 01-restore-order-00-pvcs.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/01-restore-order-00-pvcs.yaml" + mode: "0644" + vars: + pvc_backup_name: "{{ _pvc_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Pause before PVC restore + ansible.builtin.pause: + prompt: "Press Enter to restore PVCs, or Ctrl+C then 'A' to abort" + when: not (cifmw_backup_restore_auto_ack | bool) + +- name: Create PVC restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/01-restore-order-00-pvcs.yaml" + state: present + +- name: Wait for PVC restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-00-pvcs-{{ _restore_suffix }}" + _step_name: "Step 1 (PVC restore)" + +- name: Get PVC-pin dummy Deployments + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_backup_restore_namespace }}" + label_selectors: + - app=pvc-pin + register: _pvc_pin_deployments + when: cifmw_backup_restore_pin_pvcs | bool + +- name: Delete dummy Deployments after PVC restore + kubernetes.core.k8s: + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_backup_restore_namespace }}" + name: "{{ item.metadata.name }}" + state: absent + loop: "{{ _pvc_pin_deployments.resources | default([]) }}" + loop_control: + label: "{{ item.metadata.name }}" + when: cifmw_backup_restore_pin_pvcs | bool + +# ======================================== +# Step 2: Restore Foundation (Order 10) +# ======================================== +- name: Render foundation restore + ansible.builtin.template: + src: 02-restore-order-10-foundation.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/02-restore-order-10-foundation.yaml" + mode: "0644" + vars: + resources_backup_name: "{{ _resources_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Create foundation restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/02-restore-order-10-foundation.yaml" + state: present + +- name: Wait for foundation restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-10-foundation-{{ _restore_suffix }}" + _step_name: "Step 2 (Foundation restore)" + +# ======================================== +# Step 3: Restore Infrastructure (Order 20) +# ======================================== +- name: Render infrastructure restore + ansible.builtin.template: + src: 03-restore-order-20-infrastructure.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/03-restore-order-20-infrastructure.yaml" + mode: "0644" + vars: + resources_backup_name: "{{ _resources_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Create infrastructure restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/03-restore-order-20-infrastructure.yaml" + state: present + +- name: Wait for infrastructure restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-20-infra-{{ _restore_suffix }}" + _step_name: "Step 3 (Infrastructure restore)" + +# ======================================== +# Step 4: Restore ControlPlane (Order 30) +# ======================================== +- name: Render controlplane restore + ansible.builtin.template: + src: 04-restore-order-30-controlplane.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/04-restore-order-30-controlplane.yaml" + mode: "0644" + vars: + resources_backup_name: "{{ _resources_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Create controlplane restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/04-restore-order-30-controlplane.yaml" + state: present + +- name: Wait for controlplane restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-30-ctlplane-{{ _restore_suffix }}" + _step_name: "Step 4 (ControlPlane restore)" + +# ======================================== +# Step 5: Wait for Infrastructure Ready +# ======================================== +- name: Wait for OpenStackControlPlane infrastructure ready + ansible.builtin.shell: | + oc wait openstackcontrolplane -n {{ cifmw_backup_restore_namespace }} --all \ + --for=condition=OpenStackControlPlaneInfrastructureReady \ + --timeout={{ cifmw_backup_restore_infra_ready_timeout }} + changed_when: false + +- name: Get OpenStackControlPlane name + ansible.builtin.shell: | + oc get openstackcontrolplane -n {{ cifmw_backup_restore_namespace }} -o jsonpath='{.items[0].metadata.name}' + register: _ctlplane_name + changed_when: false + +# ======================================== +# Step 6: Restore GaleraBackup CRs (Order 40) +# ======================================== +- name: Render backup config restore + ansible.builtin.template: + src: 05-restore-order-40-backup-config.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/05-restore-order-40-backup-config.yaml" + mode: "0644" + vars: + resources_backup_name: "{{ _resources_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Create backup config restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/05-restore-order-40-backup-config.yaml" + state: present + +- name: Wait for backup config restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-40-backup-{{ _restore_suffix }}" + _step_name: "Step 6 (GaleraBackup restore)" + +# ======================================== +# Step 7: Database Restore +# ======================================== +- name: Get GaleraBackup names + ansible.builtin.shell: | + oc get galerabackup -n {{ cifmw_backup_restore_namespace }} -o jsonpath='{.items[*].metadata.name}' + register: _galerabackup_names + changed_when: false + +- name: Set GaleraBackup list + ansible.builtin.set_fact: + _galerabackup_list: "{{ _galerabackup_names.stdout.split() if _galerabackup_names.stdout != '' else [] }}" + +- name: Render GaleraRestore CRs + ansible.builtin.template: + src: 06a-galerarestore.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/06a-galerarestore.yaml" + mode: "0644" + vars: + galerabackup_list: "{{ _galerabackup_list }}" + when: _galerabackup_list | length > 0 + +- name: Apply GaleraRestore CRs + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/06a-galerarestore.yaml" + state: present + when: _galerabackup_list | length > 0 + +- name: Wait for GaleraRestore pods to be ready + ansible.builtin.shell: | + RESTORE_NAME="{{ item }}restore" + BACKUP_SOURCE="{{ item }}" + POD_NAME="${BACKUP_SOURCE}-restore-${RESTORE_NAME}" + oc wait --for=condition=Ready pod/${POD_NAME} -n {{ cifmw_backup_restore_namespace }} --timeout=120s + loop: "{{ _galerabackup_list }}" + changed_when: false + when: _galerabackup_list | length > 0 + +- name: Execute database restore for each GaleraRestore + ansible.builtin.shell: | + RESTORE_NAME="{{ item }}restore" + BACKUP_SOURCE="{{ item }}" + POD_NAME="${BACKUP_SOURCE}-restore-${RESTORE_NAME}" + TIMESTAMP="{{ cifmw_backup_restore_backup_timestamp }}" + RESTORE_PATTERN="/backup/data/*_${TIMESTAMP}.sql.gz" + oc exec -n {{ cifmw_backup_restore_namespace }} ${POD_NAME} -- \ + /var/lib/backup-scripts/restore_galera --yes --content {{ cifmw_backup_restore_restore_content }} ${RESTORE_PATTERN} + loop: "{{ _galerabackup_list }}" + changed_when: true + when: _galerabackup_list | length > 0 + +- name: List GaleraRestore CRs kept for validation + ansible.builtin.debug: + msg: "GaleraRestore CR '{{ item }}restore' kept for post-restore validation (cleaned up by cleanup step)" + loop: "{{ _galerabackup_list }}" + when: _galerabackup_list | length > 0 + +# ======================================== +# Step 8: OVN database restore (optional; before full control plane resume) +# ======================================== +- name: Include OVN database restore tasks + ansible.builtin.include_tasks: ovn_db_restore.yml + +# RabbitMQ credentials are restored automatically: +# The infra-operator's RabbitMQ controller labels the default-user secret +# for restore. On restore, the secret is restored in order 10, and the +# controller reuses the existing credentials when creating the new cluster. + +# ======================================== +# Step 9: Resume Full Deployment +# ======================================== +- name: Pause before resuming deployment + ansible.builtin.pause: + prompt: "Press Enter to resume full deployment, or Ctrl+C then 'A' to abort" + when: not (cifmw_backup_restore_auto_ack | bool) + +- name: Remove deployment-stage annotation + ansible.builtin.shell: | + oc annotate openstackcontrolplane {{ _ctlplane_name.stdout }} \ + -n {{ cifmw_backup_restore_namespace }} core.openstack.org/deployment-stage- + changed_when: true + +- name: Wait for OpenStackControlPlane to be ready + ansible.builtin.shell: | + oc wait openstackcontrolplane {{ _ctlplane_name.stdout }} \ + -n {{ cifmw_backup_restore_namespace }} \ + --for=condition=Ready \ + --timeout={{ cifmw_backup_restore_ctlplane_ready_timeout }} + changed_when: false + +# ======================================== +# Step 10: Restore DataPlane (Order 60) +# ======================================== +- name: Render dataplane restore + ansible.builtin.template: + src: 07-restore-order-60-dataplane.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/07-restore-order-60-dataplane.yaml" + mode: "0644" + vars: + resources_backup_name: "{{ _resources_backup_name }}" + restore_suffix: "{{ _restore_suffix }}" + +- name: Create dataplane restore + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/07-restore-order-60-dataplane.yaml" + state: present + +- name: Wait for dataplane restore + ansible.builtin.include_tasks: wait_for_restore.yml + vars: + _restore_name: "openstack-restore-60-dataplane-{{ _restore_suffix }}" + _step_name: "Step 10 (DataPlane restore)" + +# ======================================== +# Step 11: EDPM Deployment +# ======================================== +- name: Get DataPlaneNodeSet names + ansible.builtin.shell: | + oc get openstackdataplanenodeset -n {{ cifmw_backup_restore_namespace }} -o jsonpath='{.items[*].metadata.name}' + register: _nodeset_names + changed_when: false + +- name: Set nodeset list + ansible.builtin.set_fact: + _nodeset_names_list: "{{ _nodeset_names.stdout.split() }}" + when: _nodeset_names.stdout != "" + +- name: Render EDPM deployment CR + ansible.builtin.template: + src: 08-edpm-deployment.yaml.j2 + dest: "{{ _cifmw_backup_restore_rendered_dir.path }}/08-edpm-deployment.yaml" + mode: "0644" + vars: + nodeset_names_list: "{{ _nodeset_names_list }}" + restore_suffix: "{{ _restore_suffix }}" + when: _nodeset_names.stdout != "" + +- name: Create EDPM deployment + kubernetes.core.k8s: + src: "{{ _cifmw_backup_restore_rendered_dir.path }}/08-edpm-deployment.yaml" + state: present + when: _nodeset_names.stdout != "" + +- name: Print EDPM deployment status + ansible.builtin.debug: + msg: "Created EDPM deployment: edpm-deployment-post-restore-{{ _restore_suffix }}" + when: _nodeset_names.stdout != "" + +- name: Wait for EDPM deployment to complete + ansible.builtin.command: + cmd: >- + oc wait OpenStackDataPlaneDeployment + edpm-deployment-post-restore-{{ _restore_suffix }} + --namespace={{ cifmw_backup_restore_namespace }} + --for=condition=Ready + --timeout={{ cifmw_backup_restore_edpm_deploy_timeout }} + when: _nodeset_names.stdout != "" + +# ======================================== +# Step 12: Verify and sync Neutron to OVN (user-guide backup-restore Step 12) +# ======================================== +# Run after EDPM so compute ovn-controller agents reconnect to the SB DB first. +# Log mode does not change exit code for drift (Neutron logs drift as WARNING lines). +# Repair runs if OVN file backup was skipped, or if log output contains WARNING drift. +# Ref: https://github.com/openstack-k8s-operators/dev-docs/blob/main/backup-restore/user-guide.md#step-12-verify-and-sync-neutron-to-ovn +- name: Verify Neutron vs OVN (neutron-ovn-db-sync-util log mode) + ansible.builtin.shell: | + set -o pipefail + oc exec -n {{ cifmw_backup_restore_namespace }} -c neutron-api deploy/neutron -- \ + neutron-ovn-db-sync-util \ + --config-file /usr/share/neutron/neutron-dist.conf \ + --config-file /etc/neutron/neutron.conf \ + --config-dir /etc/neutron/neutron.conf.d \ + --ovn-neutron_sync_mode=log \ + --debug + register: _neutron_ovn_sync_log + changed_when: false + +- name: Decide whether Neutron–OVN repair sync is required + ansible.builtin.set_fact: + _neutron_ovn_needs_repair: >- + {{ + (not (cifmw_backup_restore_ovn_db | bool)) + or ( + (cifmw_backup_restore_ovn_db | bool) + and ( + ((_neutron_ovn_sync_log.stdout | default('')) ~ (_neutron_ovn_sync_log.stderr | default(''))) + | regex_search('(?i)\bWARNING\b') is not none + ) + ) + }} + +- name: Report Neutron–OVN repair decision + ansible.builtin.debug: + msg: >- + neutron-ovn-db-sync repair: + {{ 'running' if _neutron_ovn_needs_repair | bool else 'skipped' }}. + {% if not (cifmw_backup_restore_ovn_db | bool) %} + Reason: cifmw_backup_restore_ovn_db is false (no OVN NB/SB file backup; OVN must be repopulated from Neutron). + {% elif _neutron_ovn_needs_repair | bool %} + Reason: log-mode output contained WARNING lines (Neutron-reported drift vs OVN). + {% else %} + Reason: OVN file backup/restore was used and log-mode output had no WARNING lines. + {% endif %} + +- name: Sync Neutron state to OVN database (repair mode) + ansible.builtin.shell: | + set -o pipefail + oc exec -n {{ cifmw_backup_restore_namespace }} -c neutron-api deploy/neutron -- \ + neutron-ovn-db-sync-util \ + --config-file /usr/share/neutron/neutron-dist.conf \ + --config-file /etc/neutron/neutron.conf \ + --config-dir /etc/neutron/neutron.conf.d \ + --ovn-neutron_sync_mode=repair \ + --debug + when: _neutron_ovn_needs_repair | bool + changed_when: true + +# ======================================== +# Cleanup and Summary +# ======================================== +- name: Clean up rendered templates + ansible.builtin.file: + path: "{{ _cifmw_backup_restore_rendered_dir.path }}" + state: absent + +- name: Print restore summary + ansible.builtin.debug: + msg: + - "========================================" + - "Restore Complete" + - "========================================" + - "" + - "ControlPlane: {{ _ctlplane_name.stdout }}" + - "Restore suffix: {{ _restore_suffix }}" + - "" + - "IMPORTANT: Re-enable InstanceHa after verifying the cloud:" + - " oc patch instanceha -n {{ cifmw_backup_restore_namespace }} --type merge -p '{\"spec\":{\"disabled\":\"False\"}}'" diff --git a/roles/cifmw_backup_restore/tasks/restore_pin_pvcs.yml b/roles/cifmw_backup_restore/tasks/restore_pin_pvcs.yml new file mode 100644 index 000000000..d277c4ee0 --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/restore_pin_pvcs.yml @@ -0,0 +1,114 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Pin PVCs to Original Nodes +# +# For Data Mover restores with WaitForFirstConsumer storage (e.g., LVM/TopoLVM), +# creates dummy Deployments with nodeSelector to bind restored PVCs to their +# original nodes. Downloads backup metadata to extract PVC-to-node mapping +# from PV nodeAffinity. + +- name: Download backup metadata + ansible.builtin.shell: | + set -e + TMPDIR=$(mktemp -d) + REMOTE_FILE="/tmp/backup-$$.tar.gz" + VELERO_POD=$(oc get pods -n {{ cifmw_backup_restore_oadp_namespace }} -l deploy=velero --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}') + oc exec -n {{ cifmw_backup_restore_oadp_namespace }} ${VELERO_POD} -- rm -f ${REMOTE_FILE} >&2 || true + oc exec -n {{ cifmw_backup_restore_oadp_namespace }} ${VELERO_POD} -- \ + /velero backup download {{ _pvc_backup_name }} --insecure-skip-tls-verify -o ${REMOTE_FILE} >&2 + oc exec -n {{ cifmw_backup_restore_oadp_namespace }} ${VELERO_POD} -- cat ${REMOTE_FILE} > ${TMPDIR}/backup.tar.gz + oc exec -n {{ cifmw_backup_restore_oadp_namespace }} ${VELERO_POD} -- rm -f ${REMOTE_FILE} >&2 + mkdir -p ${TMPDIR}/backup + tar xzf ${TMPDIR}/backup.tar.gz -C ${TMPDIR}/backup + echo ${TMPDIR} + register: _backup_download + changed_when: false + +- name: Extract PVC-to-node mapping from PV nodeAffinity + ansible.builtin.shell: | + set -o pipefail + BACKUP_DIR="{{ _backup_download.stdout }}/backup" + PV_DIR="${BACKUP_DIR}/resources/persistentvolumes/cluster" + if [ ! -d "${PV_DIR}" ]; then + echo "WARNING: No PV resources found in backup" >&2 + exit 0 + fi + for f in ${PV_DIR}/*.json; do + pvc=$(jq -r '.spec.claimRef.name // empty' "$f") + ns=$(jq -r '.spec.claimRef.namespace // empty' "$f") + node=$(jq -r ' + .spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[] + | select(.key | contains("topolvm")) | .values[0] + ' "$f" 2>/dev/null) + [ -n "$pvc" ] && [ -n "$node" ] && [ "$ns" = "{{ cifmw_backup_restore_namespace }}" ] && echo "${pvc}:${node}" + done + register: _pvc_node_mapping + changed_when: false + +- name: Print PVC-to-node mapping + ansible.builtin.debug: + msg: "{{ _pvc_node_mapping.stdout_lines }}" + when: _pvc_node_mapping.stdout != "" + +- name: Create dummy Deployments to pin PVCs to nodes + ansible.builtin.shell: | + set -o pipefail + PVC_NAME="{{ item.split(':')[0] }}" + NODE_NAME="{{ item.split(':')[1] }}" + cat <- + oc wait galerabackup --all -n {{ cifmw_backup_restore_namespace }} + --for=jsonpath='{.status.conditions[0].status}'=True --timeout=120s + changed_when: false + +- name: Wait for GaleraBackup cronjobs to be created + ansible.builtin.shell: | + oc get cronjob -n {{ cifmw_backup_restore_namespace }} -l app=galera -o jsonpath='{.items[*].metadata.name}' + register: _galera_backup_cronjobs + changed_when: false + until: _galera_backup_cronjobs.stdout.split() | length >= _galera_instances | length + retries: 30 + delay: 10 + +- name: Cleanup rendered templates + ansible.builtin.file: + path: "{{ _galerabackup_rendered_dir.path }}" + state: absent + +- name: Display GaleraBackup setup complete + ansible.builtin.debug: + msg: "GaleraBackup CRs created for: {{ _galera_instances | join(', ') }}" diff --git a/roles/cifmw_backup_restore/tasks/wait_for_restore.yml b/roles/cifmw_backup_restore/tasks/wait_for_restore.yml new file mode 100644 index 000000000..956ccec7b --- /dev/null +++ b/roles/cifmw_backup_restore/tasks/wait_for_restore.yml @@ -0,0 +1,79 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Reusable task to wait for a Velero Restore to complete. +# +# Required variables: +# _restore_name: Name of the Velero Restore CR +# _step_name: Human-readable step name for log messages + +- name: "Wait for restore to complete - {{ _step_name }}" + ansible.builtin.command: + cmd: >- + oc get restore {{ _restore_name }} + -n {{ cifmw_backup_restore_oadp_namespace }} + -o jsonpath='{.status.phase}' + register: _restore_phase_raw + changed_when: false + until: _restore_phase_raw.stdout in ["Completed", "PartiallyFailed", "Failed", "FailedValidation"] + retries: "{{ (cifmw_backup_restore_restore_timeout | int / 10) | int }}" + delay: 10 + +- name: "Set restore phase fact - {{ _step_name }}" + ansible.builtin.set_fact: + _restore_phase: + stdout: "{{ _restore_phase_raw.stdout }}" + +- name: "Get restore details on non-Completed phase - {{ _step_name }}" + ansible.builtin.shell: | + set -o pipefail + echo "=== Restore Status ===" + oc get restore {{ _restore_name }} -n {{ cifmw_backup_restore_oadp_namespace }} \ + -o jsonpath='{.status}' | python3 -m json.tool 2>/dev/null || \ + oc get restore {{ _restore_name }} -n {{ cifmw_backup_restore_oadp_namespace }} \ + -o yaml + echo "" + echo "=== Velero Restore Logs ===" + oc -n {{ cifmw_backup_restore_oadp_namespace }} exec deployment/velero -- \ + ./velero restore logs {{ _restore_name }} 2>/dev/null | tail -50 || true + register: _restore_details + changed_when: false + when: _restore_phase.stdout != "Completed" + +- name: "Print restore diagnostics - {{ _step_name }}" + ansible.builtin.debug: + msg: "{{ _restore_details.stdout_lines }}" + when: _restore_phase.stdout != "Completed" + +- name: "Fail on Failed restore - {{ _step_name }}" + ansible.builtin.fail: + msg: "Restore {{ _restore_name }} {{ _restore_phase.stdout }}." + when: _restore_phase.stdout in ["Failed", "FailedValidation"] + +- name: "Fail on PartiallyFailed restore (strict mode) - {{ _step_name }}" + ansible.builtin.fail: + msg: "Restore {{ _restore_name }} {{ _restore_phase.stdout }}. Re-run with -e cifmw_backup_restore_strict_restore=false to continue." + when: _restore_phase.stdout == "PartiallyFailed" and (cifmw_backup_restore_strict_restore | bool) + +- name: "Warn on PartiallyFailed restore (non-strict mode) - {{ _step_name }}" + ansible.builtin.debug: + msg: "WARNING: Restore {{ _restore_name }} completed with status: {{ _restore_phase.stdout }}" + when: _restore_phase.stdout == "PartiallyFailed" and not (cifmw_backup_restore_strict_restore | bool) + +- name: "Print success - {{ _step_name }}" + ansible.builtin.debug: + msg: "Restore {{ _restore_name }} completed successfully" + when: _restore_phase.stdout == "Completed" diff --git a/roles/cifmw_backup_restore/templates/00-resource-modifiers-configmap.yaml.j2 b/roles/cifmw_backup_restore/templates/00-resource-modifiers-configmap.yaml.j2 new file mode 100644 index 000000000..8df9e3a25 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/00-resource-modifiers-configmap.yaml.j2 @@ -0,0 +1,39 @@ +--- +# Resource Modifier ConfigMap for Velero Restores +apiVersion: v1 +kind: ConfigMap +metadata: + name: openstack-restore-resource-modifiers + namespace: {{ cifmw_backup_restore_oadp_namespace }} +data: + resource-modifiers.yaml: | + version: v1 + resourceModifierRules: + - conditions: + groupResource: "*" + namespaces: + - {{ cifmw_backup_restore_namespace }} + mergePatches: + - patchData: | + metadata: + ownerReferences: null + annotations: + kubectl.kubernetes.io/last-applied-configuration: null + - conditions: + groupResource: openstackcontrolplanes.core.openstack.org + namespaces: + - {{ cifmw_backup_restore_namespace }} + mergePatches: + - patchData: | + metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: null + core.openstack.org/deployment-stage: "infrastructure-only" + - conditions: + groupResource: instancehas.instanceha.openstack.org + namespaces: + - {{ cifmw_backup_restore_namespace }} + mergePatches: + - patchData: | + spec: + disabled: "True" diff --git a/roles/cifmw_backup_restore/templates/01-restore-order-00-pvcs.yaml.j2 b/roles/cifmw_backup_restore/templates/01-restore-order-00-pvcs.yaml.j2 new file mode 100644 index 000000000..175eecfc2 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/01-restore-order-00-pvcs.yaml.j2 @@ -0,0 +1,17 @@ +--- +# Restore Order 00: Storage Foundation - PVCs +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-00-pvcs-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ pvc_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + excludedResources: + - pods + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers + restorePVs: true diff --git a/roles/cifmw_backup_restore/templates/02-restore-order-10-foundation.yaml.j2 b/roles/cifmw_backup_restore/templates/02-restore-order-10-foundation.yaml.j2 new file mode 100644 index 000000000..353b8db49 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/02-restore-order-10-foundation.yaml.j2 @@ -0,0 +1,19 @@ +--- +# Restore Order 10: Foundation Resources +# Restores NADs, Secrets, ConfigMaps (user-provided resources without ownerRefs) +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-10-foundation-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ resources_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/restore: "true" + backup.openstack.org/restore-order: "10" + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers diff --git a/roles/cifmw_backup_restore/templates/03-restore-order-20-infrastructure.yaml.j2 b/roles/cifmw_backup_restore/templates/03-restore-order-20-infrastructure.yaml.j2 new file mode 100644 index 000000000..e3d9b97c6 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/03-restore-order-20-infrastructure.yaml.j2 @@ -0,0 +1,20 @@ +--- +# Restore Order 20: Infrastructure CRs +# Restores OpenStackVersion, OpenStackBackupConfig, Issuers, +# NetConfig, Topology, BGPConfiguration, DNSData, InstanceHa +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-20-infra-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ resources_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/restore: "true" + backup.openstack.org/restore-order: "20" + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers diff --git a/roles/cifmw_backup_restore/templates/04-restore-order-30-controlplane.yaml.j2 b/roles/cifmw_backup_restore/templates/04-restore-order-30-controlplane.yaml.j2 new file mode 100644 index 000000000..4faabfcb0 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/04-restore-order-30-controlplane.yaml.j2 @@ -0,0 +1,19 @@ +--- +# Restore Order 30: OpenStackControlPlane (Staged) +# Restores with deployment-stage annotation for infrastructure-only start +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-30-ctlplane-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ resources_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/restore: "true" + backup.openstack.org/restore-order: "30" + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers diff --git a/roles/cifmw_backup_restore/templates/05-restore-order-40-backup-config.yaml.j2 b/roles/cifmw_backup_restore/templates/05-restore-order-40-backup-config.yaml.j2 new file mode 100644 index 000000000..0e9dc6bd7 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/05-restore-order-40-backup-config.yaml.j2 @@ -0,0 +1,18 @@ +--- +# Restore Order 40: Backup Configuration, IP Sets & DataPlane Services +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-40-backup-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ resources_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/restore: "true" + backup.openstack.org/restore-order: "40" + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers diff --git a/roles/cifmw_backup_restore/templates/06a-galerarestore.yaml.j2 b/roles/cifmw_backup_restore/templates/06a-galerarestore.yaml.j2 new file mode 100644 index 000000000..58ee92a6c --- /dev/null +++ b/roles/cifmw_backup_restore/templates/06a-galerarestore.yaml.j2 @@ -0,0 +1,14 @@ +--- +# GaleraRestore CRs for database restore +{% for backup_name in galerabackup_list %} +apiVersion: mariadb.openstack.org/v1beta1 +kind: GaleraRestore +metadata: + name: {{ backup_name }}restore + namespace: {{ cifmw_backup_restore_namespace }} +spec: + backupSource: {{ backup_name }} +{% if not loop.last %} +--- +{% endif %} +{% endfor %} diff --git a/roles/cifmw_backup_restore/templates/07-restore-order-60-dataplane.yaml.j2 b/roles/cifmw_backup_restore/templates/07-restore-order-60-dataplane.yaml.j2 new file mode 100644 index 000000000..9ae0644ac --- /dev/null +++ b/roles/cifmw_backup_restore/templates/07-restore-order-60-dataplane.yaml.j2 @@ -0,0 +1,18 @@ +--- +# Restore Order 60: DataPlane Resources +apiVersion: velero.io/v1 +kind: Restore +metadata: + name: openstack-restore-60-dataplane-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} +spec: + backupName: {{ resources_backup_name }} + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/restore: "true" + backup.openstack.org/restore-order: "60" + resourceModifier: + kind: ConfigMap + name: openstack-restore-resource-modifiers diff --git a/roles/cifmw_backup_restore/templates/08-edpm-deployment.yaml.j2 b/roles/cifmw_backup_restore/templates/08-edpm-deployment.yaml.j2 new file mode 100644 index 000000000..9b727e8d8 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/08-edpm-deployment.yaml.j2 @@ -0,0 +1,12 @@ +--- +# Post-Restore EDPM Deployment +apiVersion: dataplane.openstack.org/v1beta1 +kind: OpenStackDataPlaneDeployment +metadata: + name: edpm-deployment-post-restore-{{ restore_suffix }} + namespace: {{ cifmw_backup_restore_namespace }} +spec: + nodeSets: +{% for nodeset in nodeset_names_list %} + - {{ nodeset }} +{% endfor %} diff --git a/roles/cifmw_backup_restore/templates/backup-pvcs.yaml.j2 b/roles/cifmw_backup_restore/templates/backup-pvcs.yaml.j2 new file mode 100644 index 000000000..0f0058119 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/backup-pvcs.yaml.j2 @@ -0,0 +1,48 @@ +--- +# OpenStack PVC Backup +# Backs up PVCs labeled with backup.openstack.org/backup=true using CSI snapshots. +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: openstack-backup-pvcs-{{ cifmw_backup_restore_backup_name_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} + annotations: + openstack.org/csv-version: "{{ _backup_csv_version }}" + openstack.org/catalog-source-image: "{{ _backup_catalog_image }}" + openstack.org/operator-image: "{{ _backup_operator_image }}" +spec: + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + backup.openstack.org/backup: "true" + snapshotVolumes: true + defaultVolumesToFsBackup: false +{% if cifmw_backup_restore_snapshot_move_data | bool %} + snapshotMoveData: true +{% endif %} + volumeSnapshotLocations: [] + storageLocation: {{ cifmw_backup_restore_storage_location }} + ttl: {{ cifmw_backup_restore_backup_ttl }} + hooks: + resources: + - name: swift-xattr-backup + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + labelSelector: + matchLabels: + component: swift-storage + pre: + - exec: + container: object-server + command: + - /bin/bash + - -c + - | + set -e + DUMP="/srv/node/pv/.swift-xattrs.dump" + rm -f "$DUMP" "${DUMP}.applied" "${DUMP}.missing" + getfattr -R -d -m user.swift /srv/node/pv/ 1> "$DUMP" + echo "xattr backup complete: $(grep -c '^# file:' "$DUMP") files" + onError: Fail + timeout: {{ cifmw_backup_restore_swift_xattr_timeout | default('300s') }} diff --git a/roles/cifmw_backup_restore/templates/backup-resources.yaml.j2 b/roles/cifmw_backup_restore/templates/backup-resources.yaml.j2 new file mode 100644 index 000000000..05f4dc471 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/backup-resources.yaml.j2 @@ -0,0 +1,20 @@ +--- +# OpenStack Resources Backup (excluding PVCs) +# Backs up all resources in the OpenStack namespace except PVCs and PVs. +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: openstack-backup-resources-{{ cifmw_backup_restore_backup_name_suffix }} + namespace: {{ cifmw_backup_restore_oadp_namespace }} + annotations: + openstack.org/csv-version: "{{ _backup_csv_version }}" + openstack.org/catalog-source-image: "{{ _backup_catalog_image }}" + openstack.org/operator-image: "{{ _backup_operator_image }}" +spec: + includedNamespaces: + - {{ cifmw_backup_restore_namespace }} + excludedResources: + - persistentvolumeclaims + - persistentvolumes + storageLocation: {{ cifmw_backup_restore_storage_location }} + ttl: {{ cifmw_backup_restore_backup_ttl }} diff --git a/roles/cifmw_backup_restore/templates/galerabackup.yaml.j2 b/roles/cifmw_backup_restore/templates/galerabackup.yaml.j2 new file mode 100644 index 000000000..88f20c622 --- /dev/null +++ b/roles/cifmw_backup_restore/templates/galerabackup.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: mariadb.openstack.org/v1beta1 +kind: GaleraBackup +metadata: + name: {{ galera_instance_name }} + namespace: {{ cifmw_backup_restore_namespace }} +spec: + databaseInstance: {{ galera_instance_name }} +{% if _galera_storage_class %} + storageClass: {{ _galera_storage_class }} +{% endif %} + storageRequest: {{ cifmw_backup_restore_galera_storage_request }} + transferStorage: + storageRequest: {{ cifmw_backup_restore_galera_transfer_storage_request }} diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index 9f44f4685..903d2e114 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -902,6 +902,15 @@ - ^.config/molecule/.* name: cifmw-molecule-ci_lvms_storage parent: cifmw-molecule-noop +- job: + files: + - ^common-requirements.txt + - ^test-requirements.txt + - ^roles/cifmw_backup_restore/.* + - ^ci/playbooks/molecule.* + - ^.config/molecule/.* + name: cifmw-molecule-cifmw_backup_restore + parent: cifmw-molecule-noop - job: files: - ^common-requirements.txt diff --git a/zuul.d/projects.yaml b/zuul.d/projects.yaml index 6ea6ca035..9b4b8e84b 100644 --- a/zuul.d/projects.yaml +++ b/zuul.d/projects.yaml @@ -26,6 +26,7 @@ - cifmw-molecule-ci_network - cifmw-molecule-ci_nmstate - cifmw-molecule-ci_setup + - cifmw-molecule-cifmw_backup_restore - cifmw-molecule-cifmw_block_device - cifmw-molecule-cifmw_ceph_client - cifmw-molecule-cifmw_ceph_spec