From dc27bc5de821aefb30a2861336db824cdef5cd7b Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Wed, 6 May 2026 18:26:52 +0000 Subject: [PATCH 1/3] daemon: verify extension packages are installed after reboot Co-Authored-By: Claude Sonnet 4.5 --- pkg/daemon/daemon.go | 9 ++++++++ pkg/daemon/update.go | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ed10ed7c15..9ea69374df 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -2349,6 +2349,15 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro // Great, we've successfully rebooted for the desired config, // let's mark it done! + // Verify extension packages are actually installed before marking as done + // See: https://redhat.atlassian.net/browse/OCPBUGS-65645 + if dn.os.IsCoreOSVariant() { + coreOSDaemon := CoreOSDaemon{dn} + if err := coreOSDaemon.verifyExtensionPackages(state.currentConfig); err != nil { + return missingODC, inDesiredConfig, fmt.Errorf("extension package verification failed: %w", err) + } + } + // Get MCP associated with node pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) if err != nil { diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 264fc81146..5ef0e18b7a 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1844,6 +1844,58 @@ func (dn *CoreOSDaemon) applyExtensions(oldConfig, newConfig *mcfgv1.MachineConf return runRpmOstree(args...) } +// verifyExtensionPackages verifies that all extension packages specified in a +// MachineConfig are actually installed in the RPM database after a node reboot. +// See: https://redhat.atlassian.net/browse/OCPBUGS-65645 +func (dn *CoreOSDaemon) verifyExtensionPackages(config *mcfgv1.MachineConfig) error { + // Only verify on RHCOS/SCOS nodes + if !dn.os.IsEL() { + return nil + } + + // Get the list of extensions from the config + extensions := config.Spec.Extensions + if len(extensions) == 0 { + // No extensions to verify + return nil + } + + // Map extensions to actual package names using the existing helper + expectedPackages, err := ctrlcommon.GetPackagesForSupportedExtensions(extensions) + if err != nil { + return fmt.Errorf("failed to get packages for extensions: %w", err) + } + + klog.Infof("Verifying %d extension packages are installed for config %s", len(expectedPackages), config.GetName()) + + // Verify each package is in the RPM database + var missingPackages []string + var exitErr *exec.ExitError + for _, pkg := range expectedPackages { + // Query RPM database directly for installed packages + out, err := exec.Command("rpm", "-q", pkg).CombinedOutput() + if err == nil { + continue + } + // Check if this is exit code 1 (package not installed) vs other errors + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + missingPackages = append(missingPackages, pkg) + klog.Warningf("Extension package %s not found in RPM database", pkg) + continue + } + + // Other errors (execution failure, permission issues, etc.) should fail immediately + return fmt.Errorf("failed to query RPM database for package %q: %v: %s", pkg, err, strings.TrimSpace(string(out))) + } + + if len(missingPackages) > 0 { + return fmt.Errorf("the following extension packages are missing from the RPM database: %v", missingPackages) + } + + klog.Infof("Successfully verified all %d extension packages are installed", len(expectedPackages)) + return nil +} + // switchKernel updates kernel on host with the kernelType specified in MachineConfig. // Right now it supports default (traditional), realtime kernel and 64k pages kernel func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) error { From cfbe3835323a8a1f1786eb23422ee986a0b123fc Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Thu, 14 May 2026 19:52:28 +0000 Subject: [PATCH 2/3] daemon: add pre-reboot verification of staged extension packages In addition to the existing post-reboot verification that checks the RPM database, add pre-reboot verification that checks extension packages are properly staged in the rpm-ostree deployment before rebooting. This provides two layers of defense: 1. Pre-reboot: Verify packages are in the staged deployment 2. Post-reboot: Verify packages are in the RPM database The pre-reboot check catches issues earlier, before the node reboots, saving time and preventing unnecessary node churn when staging fails. Implementation: - Added verifyExtensionsStaged() function that queries the staged deployment and verifies all expected packages are in RequestedPackages - Updated applyExtensions() to call verifyExtensionsStaged() after rpm-ostree update succeeds - Uses existing GetBootedAndStagedDeployment() infrastructure See: https://redhat.atlassian.net/browse/OCPBUGS-65645 Co-Authored-By: Claude Sonnet 4.5 --- pkg/daemon/update.go | 63 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 5ef0e18b7a..d9380615be 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1841,7 +1841,68 @@ func (dn *CoreOSDaemon) applyExtensions(oldConfig, newConfig *mcfgv1.MachineConf // Add "update" to the start of argument list args = append([]string{constants.RPMOSTreeUpdateArg}, args...) logSystem("Applying extensions : %+q", args) - return runRpmOstree(args...) + if err := runRpmOstree(args...); err != nil { + return err + } + + // Verify extension packages are staged before rebooting + // See: https://redhat.atlassian.net/browse/OCPBUGS-65645 + return dn.verifyExtensionsStaged(newConfig) +} + +// verifyExtensionsStaged verifies that all extension packages specified in a +// MachineConfig are staged in the rpm-ostree deployment before rebooting. +// This catches issues early, before the node reboots and potentially fails to boot. +// See: https://redhat.atlassian.net/browse/OCPBUGS-65645 +func (dn *CoreOSDaemon) verifyExtensionsStaged(config *mcfgv1.MachineConfig) error { + // Only verify on RHCOS/SCOS nodes + if !dn.os.IsEL() { + return nil + } + + // Get the list of extensions from the config + extensions := config.Spec.Extensions + if len(extensions) == 0 { + // No extensions to verify + return nil + } + + // Map extensions to actual package names using the existing helper + expectedPackages, err := ctrlcommon.GetPackagesForSupportedExtensions(extensions) + if err != nil { + return fmt.Errorf("failed to get packages for extensions: %w", err) + } + + klog.Infof("Verifying %d extension packages are staged for config %s", len(expectedPackages), config.GetName()) + + // Get the staged deployment + _, staged, err := dn.NodeUpdaterClient.GetBootedAndStagedDeployment() + if err != nil { + return fmt.Errorf("failed to get staged deployment: %w", err) + } + + if staged == nil { + return fmt.Errorf("no staged deployment found after applying extensions") + } + + // Create a set of requested packages in the staged deployment for quick lookup + stagedPackages := sets.New(staged.RequestedPackages...) + + // Verify each expected package is in the staged deployment + var missingPackages []string + for _, pkg := range expectedPackages { + if !stagedPackages.Has(pkg) { + missingPackages = append(missingPackages, pkg) + klog.Warningf("Extension package %s not found in staged deployment", pkg) + } + } + + if len(missingPackages) > 0 { + return fmt.Errorf("the following extension packages are missing from the staged deployment: %v", missingPackages) + } + + klog.Infof("Successfully verified all %d extension packages are staged", len(expectedPackages)) + return nil } // verifyExtensionPackages verifies that all extension packages specified in a From 6ed921f6fc2c23fa21450a24844579780ebdc859 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Thu, 14 May 2026 16:06:43 -0400 Subject: [PATCH 3/3] testing --- pkg/daemon/update.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index d9380615be..765239ebd5 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1888,6 +1888,9 @@ func (dn *CoreOSDaemon) verifyExtensionsStaged(config *mcfgv1.MachineConfig) err // Create a set of requested packages in the staged deployment for quick lookup stagedPackages := sets.New(staged.RequestedPackages...) + // For testing only + expectedPackages = append(expectedPackages, "sysstat") + // Verify each expected package is in the staged deployment var missingPackages []string for _, pkg := range expectedPackages { @@ -1929,6 +1932,9 @@ func (dn *CoreOSDaemon) verifyExtensionPackages(config *mcfgv1.MachineConfig) er klog.Infof("Verifying %d extension packages are installed for config %s", len(expectedPackages), config.GetName()) + // // For testing only + // expectedPackages = append(expectedPackages, "sysstat") + // Verify each package is in the RPM database var missingPackages []string var exitErr *exec.ExitError