openshift · ptalgulk01 · May 8, 2026 · coderabbitai · May 8, 2026 · coderabbitai
diff --git a/test/extended-priv/const.go b/test/extended-priv/const.go
@@ -118,6 +118,8 @@ const (
 	AlpineImage = "quay.io/openshifttest/alpine@sha256:dc1536cbff0ba235d4219462aeccd4caceab9def96ae8064257d049166890083"
 	// TestSSLImage the testssl image stored in openshiftest
 	TestSSLImage = "quay.io/openshifttest/testssl@sha256:ad6fb8002cb9cfce3ddc8829fd6e7e0d997aeb1faf972650f3e5d7603f90c6ef"
+	// MCDCrioReloadedRegexp is the regexp used to verify that crio was reloaded by the MCD
+	MCDCrioReloadedRegexp = "crio.* reloaded successfully"
 
 	// Constants for NodeDisruptionPolicy
 	NodeDisruptionPolicyActionNone         = "None"

diff --git a/test/extended-priv/controller.go b/test/extended-priv/controller.go
@@ -173,6 +173,24 @@ func (mcc *Controller) GetPreviousLogs() (string, error) {
 	return prevLogs, nil
 }
 
+// RemovePod removes the controller pod forcing the creation of a new one
+func (mcc *Controller) RemovePod() error {
+	cachedPodName, err := mcc.GetCachedPodName()
+	if err != nil {
+		return err
+	}
+	if cachedPodName == "" {
+		err := fmt.Errorf("Cannot get controller pod name. Failed getting MCO controller logs")
+		logger.Errorf("Error getting controller pod name. Error: %s", err)
+		return err
+	}
+
+	// remove the cached podname, since it will not be valid anymore
+	mcc.podName = ""
+
+	return mcc.oc.WithoutNamespace().Run("delete").Args("pod", "-n", MachineConfigNamespace, cachedPodName).Execute()
+}
+
 // checkMCCPanic checks the machine-config-controller logs for panics
 func checkMCCPanic(oc *exutil.CLI) {
 	var (

diff --git a/test/extended-priv/machineconfig.go b/test/extended-priv/machineconfig.go
@@ -26,6 +26,11 @@ type MachineConfig struct {
 	skipWaitForMcp bool
 }
 
+// NewMachineConfigList construct a new machine config list struct to handle all existing machine configs
+func NewMachineConfigList(oc *exutil.CLI) *MachineConfigList {
+	return &MachineConfigList{ResourceList: *NewResourceList(oc, "mc")}
+}
+
 // NewMachineConfig create a NewMachineConfig struct
 func NewMachineConfig(oc *exutil.CLI, name, pool string) *MachineConfig {
 	mc := &MachineConfig{Resource: *NewResource(oc, "mc", name), pool: pool}

diff --git a/test/extended-priv/machineconfigpool.go b/test/extended-priv/machineconfigpool.go
@@ -2,6 +2,7 @@ package extended
 
 import (
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"strconv"
@@ -1075,6 +1076,24 @@ func (mcp MachineConfigPool) GetMOSC() (*MachineOSConfig, error) {
 	return moscs[0], nil
 }
 
+// GetCertsExpiry returns the information about the certificates tracked by the MCP
+func (mcp *MachineConfigPool) GetCertsExpiry() ([]CertExpiry, error) {
+	expiryString, err := mcp.Get(`{.status.certExpirys}`)
+	if err != nil {
+		return nil, err
+	}
+
+	var certsExp []CertExpiry
+
+	jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
+
+	if jsonerr != nil {
+		return nil, jsonerr
+	}
-	var certsExp []CertExpiry
-
-	jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
-
-	if jsonerr != nil {
-		return nil, jsonerr
-	}
+	var certsExp []CertExpiry
+
+	if expiryString == "" {
+		return certsExp, nil
+	}
+
+	jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
+
+	if jsonerr != nil {
+		return nil, jsonerr
+	}
-	var certsExp []CertExpiry
-
-	jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
-
-	if jsonerr != nil {
-		return nil, jsonerr
-	}
+	var certsExp []CertExpiry
+
+	if expiryString == "" {
+		return certsExp, nil
+	}
+
+	jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
+
+	if jsonerr != nil {
+		return nil, jsonerr
+	}
+
+	return certsExp, nil
+}
+
 // GetAll returns a []*MachineConfigPool list with all existing machine config pools sorted by creation time
 func (mcpl *MachineConfigPoolList) GetAll() ([]*MachineConfigPool, error) {
 	mcpl.ResourceList.SortByTimestamp()

diff --git a/test/extended-priv/mco.go b/test/extended-priv/mco.go
@@ -152,3 +152,50 @@ func createMcAndVerifyMCValue(oc *exutil.CLI, stepText, mcName string, node *Nod
 	o.Expect(podOut).Should(o.MatchRegexp(textToVerify.textToVerifyForNode))
 	logger.Infof("%s is verified in the machine config daemon!", stepText)
 }
+
+// getCoreDNSWorkerPodCreationTime returns the latest creation timestamp for CoreDNS worker pods
+// this func help to verify CoreDNS pods are recreated when MCP starts updating
+func getCoreDNSWorkerPodCreationTime(oc *exutil.CLI) (string, error) {
+	vsphereInfraNamespace := OnPremPlatforms[VspherePlatform]
+
+	// Get all CoreDNS pods
+	coreDNSPods := NewNamespacedResourceList(oc.AsAdmin(), "pod", vsphereInfraNamespace)
+	coreDNSPods.ByLabel("app=vsphere-infra-coredns")
+
+	// Get pod names, node names, and creation timestamps
+	podNames, err := coreDNSPods.Get(`{.items[*].metadata.name}`)
+	if err != nil {
+		return "", fmt.Errorf("failed to get pod names: %w", err)
+	}
+
+	nodeNames, err := coreDNSPods.Get(`{.items[*].spec.nodeName}`)
+	if err != nil {
+		return "", fmt.Errorf("failed to get node names: %w", err)
+	}
+
+	creationTimes, err := coreDNSPods.Get(`{.items[*].metadata.creationTimestamp}`)
+	if err != nil {
+		return "", fmt.Errorf("failed to get creation timestamps: %w", err)
+	}
+
+	// Parse the results
+	pods := strings.Fields(podNames)
+	nodes := strings.Fields(nodeNames)
+	timestamps := strings.Fields(creationTimes)
+
+	// Find the latest creation timestamp for CoreDNS pods on worker nodes
+	var latestTimestamp string
+	for i, pod := range pods {
+		if i >= len(nodes) || i >= len(timestamps) {
+			break
+		}
+		// Only check CoreDNS pods on worker nodes
+		if strings.HasPrefix(pod, "coredns-") && strings.Contains(nodes[i], "worker") {
+			if latestTimestamp == "" || timestamps[i] > latestTimestamp {
+				latestTimestamp = timestamps[i]
+			}
+		}
+	}
+
+	return latestTimestamp, nil
+}
diff --git a/test/extended-priv/mco_daemon.go b/test/extended-priv/mco_daemon.go
@@ -313,4 +313,197 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/longdurati
 		logger.Infof("OK!\n")
 
 	})
+
+	g.It("[PolarionID:68684][OTP] machine-config-controller pod restart should not make nodes unschedulable [Disruptive]", func() {
+		var (
+			controller = NewController(oc.AsAdmin())
+			masterNode = NewMachineConfigPool(oc.AsAdmin(), MachineConfigPoolMaster).GetNodesOrFail()[0]
+		)
+
+		exutil.By("Check that nodes are not modified when the controller pod is removed")
+		labels, err := masterNode.Get(`{.metadata.labels}`)
+		o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the labels in node %s", masterNode.GetName())
+
+		masterNode.oc.NotShowInfo()        // avoid spamming the logs
+		o.Consistently(func(gm o.Gomega) { // Passing o.Gomega as parameter we can use assertions inside the Consistently function without breaking the retries.
+			logger.Infof("Remove controller pod")
+			gm.Expect(controller.RemovePod()).To(o.Succeed(), "Could not remove the controller pod")
+
+			logger.Infof("Check that the node was not modified")
+			gm.Consistently(func(gm o.Gomega) {
+				gm.Expect(masterNode.Get(`{.metadata.labels}`)).To(o.MatchJSON(labels),
+					"Labels in node %s have changed after removing the controller pod, and they should not change", masterNode.GetName())
+				gm.Expect(masterNode.IsCordoned()).To(o.BeFalse(),
+					"The node %s was cordoned after removing the controller pod. Node: \n%s",
+					masterNode.GetName(), masterNode.PrettyString())
+			}, "10s", "0s").
+				Should(o.Succeed(),
+					"The node %s was modified when the controller pod was removed")
+		}, "4m", "1s").
+			Should(o.Succeed(),
+				"When we remove the controller pod the node %s is modified")
+
+		logger.Infof("OK!\n")
+	})
+
+	g.It("[PolarionID:82299][OTP] Check race condition in rpm-ostree update logic [Disruptive]", func() {
+		mcp, cleanup, err := GetCompactCompatibleOrCustomPool(oc.AsAdmin(), 1)
+		defer cleanup()
+		o.Expect(err).NotTo(o.HaveOccurred(), "Error getting pool for testing")
+
+		var (
+			node = mcp.GetSortedNodesOrFail()[0]
+
+			overrideContent = `[Service]
+ExecStartPre=/bin/bash -c "echo 'exec start pre'; /bin/sleep 15; echo 'exec start pre end'"`
+			overridePath = "/etc/systemd/system/ostree-finalize-staged-hold.service.d/override.conf"
+			overrideMode = "0600"
+
+			fileConfig = getBase64EncodedFileConfig(overridePath, overrideContent, overrideMode)
+
+			mcOverrideName = fmt.Sprintf("99-%s-tc-%s-override", mcp.GetName(), GetCurrentTestPolarionIDNumber())
+			mcOverride     = NewMachineConfig(oc.AsAdmin(), mcOverrideName, mcp.GetName())
+
+			kernelArgs = "abc=def"
+
+			mcKernelArgsName = fmt.Sprintf("99-%s-tc-%s-kernelargs", mcp.GetName(), GetCurrentTestPolarionIDNumber())
+			mcKernelArgs     = NewMachineConfig(oc.AsAdmin(), mcKernelArgsName, mcp.GetName())
+		)
+
+		exutil.By("Override ostree finalizer")
+		mcOverride.parameters = []string{fmt.Sprintf("FILES=[%s]", fileConfig)}
+		mcOverride.skipWaitForMcp = true
+		defer mcOverride.DeleteWithWait()
+		mcOverride.create()
+		logger.Infof("OK!\n")
+
+		exutil.By("Wait for the override MC to be applied")
+		mcp.waitForComplete()
+		logger.Infof("OK!\n")
+
+		exutil.By("Check that the file was correctly deployed")
+		o.Eventually(NewRemoteFile(node, overridePath), "2m", "10s").Should(HaveContent(overrideContent),
+			"Wrong content in file %s", overridePath)
+		logger.Infof("OK!\n")
+
+		exutil.By("Configure kernel args")
+		mcKernelArgs.parameters = []string{fmt.Sprintf(`KERNEL_ARGS=["%s"]`, kernelArgs)}
+		mcKernelArgs.skipWaitForMcp = true
+		defer mcKernelArgs.Delete()
+		mcKernelArgs.create()
+		logger.Infof("OK!\n")
+
+		exutil.By("Wait for the kernelargs MC to be applied")
+		mcp.waitForComplete()
+		logger.Infof("OK!\n")
+
+		exutil.By("Check that the kernel args were correctly applied")
+		o.Expect(node.IsKernelArgEnabled(kernelArgs)).To(o.BeTrue(),
+			"Kernel argument %s was not enabled in the node %s", kernelArgs, node.GetName())
+		logger.Infof("OK!\n")
+	})
+
+	g.It("[PolarionID:83134][OTP] Check that MCC can find all requested resources [Serial]", func() {
+		var (
+			mcc                      = NewController(oc.AsAdmin())
+			resourceNotFoundErrorMsg = "the server could not find the requested resource"
+			listFailureErrorMsg      = "failed to list"
+		)
+
+		exutil.By("Check that MCC can find all requested resources")
+		o.Eventually(mcc.GetLogs, "1m", "20s").ShouldNot(o.And(
+			o.ContainSubstring(resourceNotFoundErrorMsg),
+			o.ContainSubstring(listFailureErrorMsg)),
+			"MCC is reporting that some resources cannot be found or listed")
+		logger.Infof("OK!\n")
+
+	})
+
+	g.It("[PolarionID:83943][OTP] CoreDNS Static Pod Redeploy Causes DNS Failure and rpm-ostree disable on vSphere IPI [Disruptive]", func() {
+		skipTestIfSupportedPlatformNotMatched(oc, VspherePlatform)
+		var (
+			mcName              = "custom-coredns-and-osimage"
+			coreDNSManifestPath = "/etc/kubernetes/manifests/coredns.yaml"
+			newCPU              = "50m"
+			newMemory           = "50Mi"
+			mcp                 = GetCompactCompatiblePool(oc.AsAdmin())
+			node                = mcp.GetNodesOrFail()[0]
+			dockerFileCommands  = `RUN echo "hello" > /etc/test.txt`
+		)
+
+		exutil.By("Get current CoreDNS manifest from the node")
+		coreDNSFile := NewRemoteFile(node, coreDNSManifestPath)
+		o.Expect(coreDNSFile.Fetch()).NotTo(o.HaveOccurred(),
+			"Failed to get CoreDNS manifest from node %s", node.GetName())
+		coreDNSContent := coreDNSFile.GetTextContent()
+		o.Expect(coreDNSContent).NotTo(o.BeEmpty(), "CoreDNS manifest is empty")
+		logger.Infof("OK!\n")
+
+		exutil.By("Modify CoreDNS manifest to change CPU and memory")
+		cpuRegex := regexp.MustCompile(`cpu:\s*\d+m`)
+		memoryRegex := regexp.MustCompile(`memory:\s*\d+Mi`)
+		modifiedCoreDNS := cpuRegex.ReplaceAllString(coreDNSContent, "cpu: "+newCPU)
+		modifiedCoreDNS = memoryRegex.ReplaceAllString(modifiedCoreDNS, "memory: "+newMemory)
+		logger.Infof("OK!\n")
+
+		exutil.By("Pause the MCP")
+		mcp.pause(true)
+		defer mcp.pause(false)
+		o.Expect(mcp.IsPaused()).Should(o.BeTrue(), "%s pool is not paused", mcp.GetName())
+		logger.Infof("OK!\n")
+
+		// Build the new osImage
+		osImageBuilder := OsImageBuilderInNode{
+			node:               node,
+			dockerFileCommands: dockerFileCommands,
+		}
+		digestedImage, err := osImageBuilder.CreateAndDigestOsImage()
+		o.Expect(err).NotTo(o.HaveOccurred(),
+			"Error creating the new osImage")
+		logger.Infof("OK\n")
+		logger.Infof("Digested image: %s\n", digestedImage)
+
+		exutil.By("Create MC with modified CoreDNS manifest and custom osImage")
+		mc := NewMachineConfig(oc.AsAdmin(), mcName, mcp.GetName())
+		fileConfig := getBase64EncodedFileConfig(coreDNSManifestPath, modifiedCoreDNS, "420")
+		mc.parameters = []string{fmt.Sprintf("FILES=[%s]", fileConfig), "OS_IMAGE=" + digestedImage}
+		defer mc.DeleteWithWait()
+		mc.skipWaitForMcp = true
+		mc.create()
+		logger.Infof("OK!\n")
+
+		exutil.By("Get initial CoreDNS pod creation time before unpause")
+		initialCreationTime, err := getCoreDNSWorkerPodCreationTime(oc)
+		o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get initial creation time")
+		o.Expect(initialCreationTime).NotTo(o.BeEmpty(), "No CoreDNS pods found on worker nodes")
+		logger.Infof("OK!\n")
+
+		exutil.By("Unpause the MCP")
+		mcp.pause(false)
+		logger.Infof("OK!\n")
+
+		exutil.By("Verify CoreDNS pods are recreated when MCP starts updating")
+		o.Eventually(getCoreDNSWorkerPodCreationTime, "15m", "30s").
+			WithArguments(oc).
+			ShouldNot(o.Or(o.Equal(initialCreationTime), o.BeEmpty()),
+				"CoreDNS pods were not recreated after MCP update started")
+		logger.Infof("OK!\n")
+
+		exutil.By("Verify MCP completes update without getting stuck")
+		mcp.waitForComplete()
+		logger.Infof("OK!\n")
+
+		exutil.By("Verify CoreDNS manifest changes are applied on the node")
+		o.Expect(coreDNSFile.Fetch()).NotTo(o.HaveOccurred(),
+			"Failed to re-fetch CoreDNS manifest from node %s", node.GetName())
+		o.Expect(coreDNSFile).To(HaveContent(o.ContainSubstring("cpu: "+newCPU)),
+			"CPU value not updated in CoreDNS manifest on node %s", node.GetName())
+		o.Expect(coreDNSFile).To(HaveContent(o.ContainSubstring("memory: "+newMemory)),
+			"Memory value not updated in CoreDNS manifest on node %s", node.GetName())
+		logger.Infof("OK!\n")
+
+		exutil.By("Verify osImage is applied on the node")
+		o.Expect(node.GetCurrentBootOSImage()).Should(o.Equal(digestedImage), "Custom osImage not applied on the node")
+		logger.Infof("OK!\n")
+	})
 })