Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions test/extended-priv/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ const (
AlpineImage = "quay.io/openshifttest/alpine@sha256:dc1536cbff0ba235d4219462aeccd4caceab9def96ae8064257d049166890083"
// TestSSLImage the testssl image stored in openshiftest
TestSSLImage = "quay.io/openshifttest/testssl@sha256:ad6fb8002cb9cfce3ddc8829fd6e7e0d997aeb1faf972650f3e5d7603f90c6ef"
// MCDCrioReloadedRegexp is the regexp used to verify that crio was reloaded by the MCD
MCDCrioReloadedRegexp = "crio.* reloaded successfully"

// Constants for NodeDisruptionPolicy
NodeDisruptionPolicyActionNone = "None"
Expand Down
18 changes: 18 additions & 0 deletions test/extended-priv/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,24 @@ func (mcc *Controller) GetPreviousLogs() (string, error) {
return prevLogs, nil
}

// RemovePod removes the controller pod forcing the creation of a new one
func (mcc *Controller) RemovePod() error {
cachedPodName, err := mcc.GetCachedPodName()
if err != nil {
return err
}
if cachedPodName == "" {
err := fmt.Errorf("Cannot get controller pod name. Failed getting MCO controller logs")
logger.Errorf("Error getting controller pod name. Error: %s", err)
return err
}

// remove the cached podname, since it will not be valid anymore
mcc.podName = ""

return mcc.oc.WithoutNamespace().Run("delete").Args("pod", "-n", MachineConfigNamespace, cachedPodName).Execute()
}

// checkMCCPanic checks the machine-config-controller logs for panics
func checkMCCPanic(oc *exutil.CLI) {
var (
Expand Down
5 changes: 5 additions & 0 deletions test/extended-priv/machineconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ type MachineConfig struct {
skipWaitForMcp bool
}

// NewMachineConfigList construct a new machine config list struct to handle all existing machine configs
func NewMachineConfigList(oc *exutil.CLI) *MachineConfigList {
return &MachineConfigList{ResourceList: *NewResourceList(oc, "mc")}
}

// NewMachineConfig create a NewMachineConfig struct
func NewMachineConfig(oc *exutil.CLI, name, pool string) *MachineConfig {
mc := &MachineConfig{Resource: *NewResource(oc, "mc", name), pool: pool}
Expand Down
19 changes: 19 additions & 0 deletions test/extended-priv/machineconfigpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package extended

import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
Expand Down Expand Up @@ -1075,6 +1076,24 @@ func (mcp MachineConfigPool) GetMOSC() (*MachineOSConfig, error) {
return moscs[0], nil
}

// GetCertsExpiry returns the information about the certificates tracked by the MCP
func (mcp *MachineConfigPool) GetCertsExpiry() ([]CertExpiry, error) {
expiryString, err := mcp.Get(`{.status.certExpirys}`)
if err != nil {
return nil, err
}

var certsExp []CertExpiry

jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)

if jsonerr != nil {
return nil, jsonerr
}
Comment on lines +1086 to +1092
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

GetCertsExpiry returns a spurious JSON error when the MCP has no certExpirys

When status.certExpirys is absent, mcp.Get(...) returns an empty string with no error. json.Unmarshal([]byte(""), ...) then returns unexpected end of JSON input, making callers believe the MCP is broken rather than simply having no certificate data yet.

🛡️ Proposed fix
 var certsExp []CertExpiry
+if expiryString == "" {
+    return certsExp, nil
+}

 jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
var certsExp []CertExpiry
jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
if jsonerr != nil {
return nil, jsonerr
}
var certsExp []CertExpiry
if expiryString == "" {
return certsExp, nil
}
jsonerr := json.Unmarshal([]byte(expiryString), &certsExp)
if jsonerr != nil {
return nil, jsonerr
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended-priv/machineconfigpool.go` around lines 1086 - 1092,
GetCertsExpiry currently calls json.Unmarshal on expiryString which can be empty
(mcp.Get returns ""), causing "unexpected end of JSON input"; before calling
json.Unmarshal in GetCertsExpiry, check if strings.TrimSpace(expiryString) == ""
(or expiryString == "") and return an empty slice (or nil) and nil error
immediately; then proceed to json.Unmarshal into certsExp as before. Ensure you
reference the certsExp variable and the GetCertsExpiry function when making this
change and add a strings import if needed.


return certsExp, nil
}

// GetAll returns a []*MachineConfigPool list with all existing machine config pools sorted by creation time
func (mcpl *MachineConfigPoolList) GetAll() ([]*MachineConfigPool, error) {
mcpl.ResourceList.SortByTimestamp()
Expand Down
47 changes: 47 additions & 0 deletions test/extended-priv/mco.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,50 @@ func createMcAndVerifyMCValue(oc *exutil.CLI, stepText, mcName string, node *Nod
o.Expect(podOut).Should(o.MatchRegexp(textToVerify.textToVerifyForNode))
logger.Infof("%s is verified in the machine config daemon!", stepText)
}

// getCoreDNSWorkerPodCreationTime returns the latest creation timestamp for CoreDNS worker pods
// this func help to verify CoreDNS pods are recreated when MCP starts updating
func getCoreDNSWorkerPodCreationTime(oc *exutil.CLI) (string, error) {
vsphereInfraNamespace := OnPremPlatforms[VspherePlatform]

// Get all CoreDNS pods
coreDNSPods := NewNamespacedResourceList(oc.AsAdmin(), "pod", vsphereInfraNamespace)
coreDNSPods.ByLabel("app=vsphere-infra-coredns")

// Get pod names, node names, and creation timestamps
podNames, err := coreDNSPods.Get(`{.items[*].metadata.name}`)
if err != nil {
return "", fmt.Errorf("failed to get pod names: %w", err)
}

nodeNames, err := coreDNSPods.Get(`{.items[*].spec.nodeName}`)
if err != nil {
return "", fmt.Errorf("failed to get node names: %w", err)
}

creationTimes, err := coreDNSPods.Get(`{.items[*].metadata.creationTimestamp}`)
if err != nil {
return "", fmt.Errorf("failed to get creation timestamps: %w", err)
}

// Parse the results
pods := strings.Fields(podNames)
nodes := strings.Fields(nodeNames)
timestamps := strings.Fields(creationTimes)

// Find the latest creation timestamp for CoreDNS pods on worker nodes
var latestTimestamp string
for i, pod := range pods {
if i >= len(nodes) || i >= len(timestamps) {
break
}
// Only check CoreDNS pods on worker nodes
if strings.HasPrefix(pod, "coredns-") && strings.Contains(nodes[i], "worker") {
if latestTimestamp == "" || timestamps[i] > latestTimestamp {
latestTimestamp = timestamps[i]
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

return latestTimestamp, nil
}
193 changes: 193 additions & 0 deletions test/extended-priv/mco_daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,4 +313,197 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/longdurati
logger.Infof("OK!\n")

})

g.It("[PolarionID:68684][OTP] machine-config-controller pod restart should not make nodes unschedulable [Disruptive]", func() {
var (
controller = NewController(oc.AsAdmin())
masterNode = NewMachineConfigPool(oc.AsAdmin(), MachineConfigPoolMaster).GetNodesOrFail()[0]
)

exutil.By("Check that nodes are not modified when the controller pod is removed")
labels, err := masterNode.Get(`{.metadata.labels}`)
o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the labels in node %s", masterNode.GetName())

masterNode.oc.NotShowInfo() // avoid spamming the logs
o.Consistently(func(gm o.Gomega) { // Passing o.Gomega as parameter we can use assertions inside the Consistently function without breaking the retries.
logger.Infof("Remove controller pod")
gm.Expect(controller.RemovePod()).To(o.Succeed(), "Could not remove the controller pod")

logger.Infof("Check that the node was not modified")
gm.Consistently(func(gm o.Gomega) {
gm.Expect(masterNode.Get(`{.metadata.labels}`)).To(o.MatchJSON(labels),
"Labels in node %s have changed after removing the controller pod, and they should not change", masterNode.GetName())
gm.Expect(masterNode.IsCordoned()).To(o.BeFalse(),
"The node %s was cordoned after removing the controller pod. Node: \n%s",
masterNode.GetName(), masterNode.PrettyString())
}, "10s", "0s").
Should(o.Succeed(),
"The node %s was modified when the controller pod was removed")
}, "4m", "1s").
Should(o.Succeed(),
"When we remove the controller pod the node %s is modified")

logger.Infof("OK!\n")
})

g.It("[PolarionID:82299][OTP] Check race condition in rpm-ostree update logic [Disruptive]", func() {
mcp, cleanup, err := GetCompactCompatibleOrCustomPool(oc.AsAdmin(), 1)
defer cleanup()
o.Expect(err).NotTo(o.HaveOccurred(), "Error getting pool for testing")

Comment thread
coderabbitai[bot] marked this conversation as resolved.
var (
node = mcp.GetSortedNodesOrFail()[0]

overrideContent = `[Service]
ExecStartPre=/bin/bash -c "echo 'exec start pre'; /bin/sleep 15; echo 'exec start pre end'"`
overridePath = "/etc/systemd/system/ostree-finalize-staged-hold.service.d/override.conf"
overrideMode = "0600"

fileConfig = getBase64EncodedFileConfig(overridePath, overrideContent, overrideMode)

mcOverrideName = fmt.Sprintf("99-%s-tc-%s-override", mcp.GetName(), GetCurrentTestPolarionIDNumber())
mcOverride = NewMachineConfig(oc.AsAdmin(), mcOverrideName, mcp.GetName())

kernelArgs = "abc=def"

mcKernelArgsName = fmt.Sprintf("99-%s-tc-%s-kernelargs", mcp.GetName(), GetCurrentTestPolarionIDNumber())
mcKernelArgs = NewMachineConfig(oc.AsAdmin(), mcKernelArgsName, mcp.GetName())
)

exutil.By("Override ostree finalizer")
mcOverride.parameters = []string{fmt.Sprintf("FILES=[%s]", fileConfig)}
mcOverride.skipWaitForMcp = true
defer mcOverride.DeleteWithWait()
mcOverride.create()
logger.Infof("OK!\n")

exutil.By("Wait for the override MC to be applied")
mcp.waitForComplete()
logger.Infof("OK!\n")

exutil.By("Check that the file was correctly deployed")
o.Eventually(NewRemoteFile(node, overridePath), "2m", "10s").Should(HaveContent(overrideContent),
"Wrong content in file %s", overridePath)
logger.Infof("OK!\n")

exutil.By("Configure kernel args")
mcKernelArgs.parameters = []string{fmt.Sprintf(`KERNEL_ARGS=["%s"]`, kernelArgs)}
mcKernelArgs.skipWaitForMcp = true
defer mcKernelArgs.Delete()
mcKernelArgs.create()
logger.Infof("OK!\n")

exutil.By("Wait for the kernelargs MC to be applied")
mcp.waitForComplete()
logger.Infof("OK!\n")

exutil.By("Check that the kernel args were correctly applied")
o.Expect(node.IsKernelArgEnabled(kernelArgs)).To(o.BeTrue(),
"Kernel argument %s was not enabled in the node %s", kernelArgs, node.GetName())
logger.Infof("OK!\n")
})

g.It("[PolarionID:83134][OTP] Check that MCC can find all requested resources [Serial]", func() {
var (
mcc = NewController(oc.AsAdmin())
resourceNotFoundErrorMsg = "the server could not find the requested resource"
listFailureErrorMsg = "failed to list"
)

exutil.By("Check that MCC can find all requested resources")
o.Eventually(mcc.GetLogs, "1m", "20s").ShouldNot(o.And(
o.ContainSubstring(resourceNotFoundErrorMsg),
o.ContainSubstring(listFailureErrorMsg)),
"MCC is reporting that some resources cannot be found or listed")
Comment on lines +414 to +417
Copy link
Copy Markdown

@coderabbitai coderabbitai Bot May 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Use Or for independent MCC log error conditions

Line 414 currently uses ShouldNot(o.And(...)), which still passes when only one failure signal is present. The intent here is to fail if either error appears.

Suggested fix
-		o.Eventually(mcc.GetLogs, "1m", "20s").ShouldNot(o.And(
+		o.Eventually(mcc.GetLogs, "1m", "20s").ShouldNot(o.Or(
 			o.ContainSubstring(resourceNotFoundErrorMsg),
 			o.ContainSubstring(listFailureErrorMsg)),
 			"MCC is reporting that some resources cannot be found or listed")
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended-priv/mco_daemon.go` around lines 414 - 417, The test currently
checks mcc logs with
o.Eventually(mcc.GetLogs...).ShouldNot(o.And(resourceNotFoundErrorMsg,
listFailureErrorMsg)) which only fails when both messages are present; change
the assertion to use Or so it fails if either resourceNotFoundErrorMsg or
listFailureErrorMsg appears (i.e., replace the And(...) matcher with Or(...) in
the ShouldNot call referencing mcc.GetLogs, resourceNotFoundErrorMsg and
listFailureErrorMsg).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to check both the errors here

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ptalgulk01, understood! The ShouldNot(o.And(...)) is intentional here — the test should only flag a problem when both "the server could not find the requested resource" and "failed to list" appear together in the MCC logs, as their co-occurrence is what indicates the specific resource-listing failure. Having either message alone may be acceptable. Thanks for the clarification!

logger.Infof("OK!\n")

})

g.It("[PolarionID:83943][OTP] CoreDNS Static Pod Redeploy Causes DNS Failure and rpm-ostree disable on vSphere IPI [Disruptive]", func() {
skipTestIfSupportedPlatformNotMatched(oc, VspherePlatform)
var (
mcName = "custom-coredns-and-osimage"
coreDNSManifestPath = "/etc/kubernetes/manifests/coredns.yaml"
newCPU = "50m"
newMemory = "50Mi"
mcp = GetCompactCompatiblePool(oc.AsAdmin())
node = mcp.GetNodesOrFail()[0]
dockerFileCommands = `RUN echo "hello" > /etc/test.txt`
)

exutil.By("Get current CoreDNS manifest from the node")
coreDNSFile := NewRemoteFile(node, coreDNSManifestPath)
o.Expect(coreDNSFile.Fetch()).NotTo(o.HaveOccurred(),
"Failed to get CoreDNS manifest from node %s", node.GetName())
coreDNSContent := coreDNSFile.GetTextContent()
o.Expect(coreDNSContent).NotTo(o.BeEmpty(), "CoreDNS manifest is empty")
logger.Infof("OK!\n")

exutil.By("Modify CoreDNS manifest to change CPU and memory")
cpuRegex := regexp.MustCompile(`cpu:\s*\d+m`)
memoryRegex := regexp.MustCompile(`memory:\s*\d+Mi`)
modifiedCoreDNS := cpuRegex.ReplaceAllString(coreDNSContent, "cpu: "+newCPU)
modifiedCoreDNS = memoryRegex.ReplaceAllString(modifiedCoreDNS, "memory: "+newMemory)
logger.Infof("OK!\n")

exutil.By("Pause the MCP")
mcp.pause(true)
defer mcp.pause(false)
o.Expect(mcp.IsPaused()).Should(o.BeTrue(), "%s pool is not paused", mcp.GetName())
logger.Infof("OK!\n")

// Build the new osImage
osImageBuilder := OsImageBuilderInNode{
node: node,
dockerFileCommands: dockerFileCommands,
}
digestedImage, err := osImageBuilder.CreateAndDigestOsImage()
o.Expect(err).NotTo(o.HaveOccurred(),
"Error creating the new osImage")
logger.Infof("OK\n")
logger.Infof("Digested image: %s\n", digestedImage)

exutil.By("Create MC with modified CoreDNS manifest and custom osImage")
mc := NewMachineConfig(oc.AsAdmin(), mcName, mcp.GetName())
fileConfig := getBase64EncodedFileConfig(coreDNSManifestPath, modifiedCoreDNS, "420")
mc.parameters = []string{fmt.Sprintf("FILES=[%s]", fileConfig), "OS_IMAGE=" + digestedImage}
defer mc.DeleteWithWait()
mc.skipWaitForMcp = true
mc.create()
logger.Infof("OK!\n")

exutil.By("Get initial CoreDNS pod creation time before unpause")
initialCreationTime, err := getCoreDNSWorkerPodCreationTime(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get initial creation time")
o.Expect(initialCreationTime).NotTo(o.BeEmpty(), "No CoreDNS pods found on worker nodes")
logger.Infof("OK!\n")

exutil.By("Unpause the MCP")
mcp.pause(false)
logger.Infof("OK!\n")

exutil.By("Verify CoreDNS pods are recreated when MCP starts updating")
o.Eventually(getCoreDNSWorkerPodCreationTime, "15m", "30s").
WithArguments(oc).
ShouldNot(o.Or(o.Equal(initialCreationTime), o.BeEmpty()),
"CoreDNS pods were not recreated after MCP update started")
logger.Infof("OK!\n")

exutil.By("Verify MCP completes update without getting stuck")
mcp.waitForComplete()
logger.Infof("OK!\n")

exutil.By("Verify CoreDNS manifest changes are applied on the node")
o.Expect(coreDNSFile.Fetch()).NotTo(o.HaveOccurred(),
"Failed to re-fetch CoreDNS manifest from node %s", node.GetName())
o.Expect(coreDNSFile).To(HaveContent(o.ContainSubstring("cpu: "+newCPU)),
"CPU value not updated in CoreDNS manifest on node %s", node.GetName())
o.Expect(coreDNSFile).To(HaveContent(o.ContainSubstring("memory: "+newMemory)),
"Memory value not updated in CoreDNS manifest on node %s", node.GetName())
logger.Infof("OK!\n")

exutil.By("Verify osImage is applied on the node")
o.Expect(node.GetCurrentBootOSImage()).Should(o.Equal(digestedImage), "Custom osImage not applied on the node")
logger.Infof("OK!\n")
})
})
Loading