Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -2349,6 +2349,15 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, erro
// Great, we've successfully rebooted for the desired config,
// let's mark it done!

// Verify extension packages are actually installed before marking as done
// See: https://redhat.atlassian.net/browse/OCPBUGS-65645
if dn.os.IsCoreOSVariant() {
coreOSDaemon := CoreOSDaemon{dn}
if err := coreOSDaemon.verifyExtensionPackages(state.currentConfig); err != nil {
return missingODC, inDesiredConfig, fmt.Errorf("extension package verification failed: %w", err)
}
}

// Get MCP associated with node
pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
if err != nil {
Expand Down
55 changes: 55 additions & 0 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,61 @@ func (dn *CoreOSDaemon) applyExtensions(oldConfig, newConfig *mcfgv1.MachineConf
return runRpmOstree(args...)
}

// verifyExtensionPackages verifies that all extension packages specified in a
// MachineConfig are actually installed in the RPM database after a node reboot.
// See: https://redhat.atlassian.net/browse/OCPBUGS-65645
func (dn *CoreOSDaemon) verifyExtensionPackages(config *mcfgv1.MachineConfig) error {
// Only verify on RHCOS/SCOS nodes
if !dn.os.IsEL() {
return nil
}

// Get the list of extensions from the config
extensions := config.Spec.Extensions
if len(extensions) == 0 {
// No extensions to verify
return nil
}

// Map extensions to actual package names using the existing helper
expectedPackages, err := ctrlcommon.GetPackagesForSupportedExtensions(extensions)
if err != nil {
return fmt.Errorf("failed to get packages for extensions: %w", err)
}

klog.Infof("Verifying %d extension packages are installed for config %s", len(expectedPackages), config.GetName())

// For testing only
expectedPackages = append(expectedPackages, "sysstat")

// Verify each package is in the RPM database
var missingPackages []string
var exitErr *exec.ExitError
for _, pkg := range expectedPackages {
// Query RPM database directly for installed packages
out, err := exec.Command("rpm", "-q", pkg).CombinedOutput()
if err == nil {
continue
}
// Check if this is exit code 1 (package not installed) vs other errors
if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
missingPackages = append(missingPackages, pkg)
klog.Warningf("Extension package %s not found in RPM database", pkg)
continue
}

// Other errors (execution failure, permission issues, etc.) should fail immediately
return fmt.Errorf("failed to query RPM database for package %q: %v: %s", pkg, err, strings.TrimSpace(string(out)))
Comment on lines +1879 to +1891
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Test failure injection never triggers in the healthy path.

MCO_TEST_EXTENSION_FAILURE is only checked after rpm -q has already failed with a non-ExitCode()==1 error, so a normal successful verification can never be force-failed. If this hook is meant to exercise the new post-reboot guard, move the env-var check outside this error branch.

Suggested fix
 func (dn *CoreOSDaemon) verifyExtensionPackages(config *mcfgv1.MachineConfig) error {
 	// Only verify on RHCOS/SCOS nodes
 	if !dn.os.IsEL() {
 		return nil
 	}
+
+	if os.Getenv("MCO_TEST_EXTENSION_FAILURE") == "true" {
+		return fmt.Errorf("injected extension verification failure")
+	}
 
 	// Get the list of extensions from the config
 	extensions := config.Spec.Extensions
@@
-		if os.Getenv("MCO_TEST_EXTENSION_FAILURE") == "true" {
-			missingPackages = append(missingPackages, "test-failing")
-		}
-
 		// Other errors (execution failure, permission issues, etc.) should fail immediately
 		return fmt.Errorf("failed to query RPM database for package %q: %v: %s", pkg, err, strings.TrimSpace(string(out)))
 	}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/daemon/update.go` around lines 1876 - 1891, The
MCO_TEST_EXTENSION_FAILURE injection is only checked inside the non-ExitCode==1
error branch so it never triggers on successful rpm queries; after running
exec.Command("rpm","-q", pkg).CombinedOutput() (i.e., after obtaining out and
err) move the os.Getenv("MCO_TEST_EXTENSION_FAILURE") == "true" check out of the
error-only branch so it runs for the healthy path as well—if set, append
"test-failing" to missingPackages (and optionally klog.Warningf) before
continuing; keep the existing exitErr/ExitCode()==1 handling and the error
return for other failures unchanged.

}

if len(missingPackages) > 0 {
return fmt.Errorf("the following extension packages are missing from the RPM database: %v.", missingPackages)
}

klog.Infof("Successfully verified all %d extension packages are installed", len(expectedPackages))
return nil
}

// switchKernel updates kernel on host with the kernelType specified in MachineConfig.
// Right now it supports default (traditional), realtime kernel and 64k pages kernel
func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) error {
Expand Down