Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ require (
k8s.io/kube-aggregator v0.34.1
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3
k8s.io/kubectl v0.34.1
k8s.io/kubelet v0.31.1
k8s.io/kubernetes v1.34.1
k8s.io/pod-security-admission v0.34.1
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
Expand Down Expand Up @@ -399,7 +400,6 @@ require (
k8s.io/externaljwt v0.0.0 // indirect
k8s.io/kms v0.34.1 // indirect
k8s.io/kube-scheduler v0.0.0 // indirect
k8s.io/kubelet v0.31.1 // indirect
k8s.io/mount-utils v0.0.0 // indirect
k8s.io/sample-apiserver v0.0.0 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect
Expand Down
223 changes: 223 additions & 0 deletions test/extended/node/node_swap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
package node

import (
"context"
"fmt"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework"

configv1 "github.com/openshift/api/config/v1"
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
mcclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
exutil "github.com/openshift/origin/test/extended/util"
)

const (
workerGeneratedKubeletMC = "99-worker-generated-kubelet"
)

var _ = g.Describe("[Jira:Node][sig-node] Node non-cnv swap configuration", func() {
defer g.GinkgoRecover()

var oc = exutil.NewCLI("node-swap")

g.BeforeEach(func(ctx context.Context) {
// Skip all tests on MicroShift clusters
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster")
}
})
Comment thread
BhargaviGudi marked this conversation as resolved.

// This test validates that:
// - Worker nodes have failSwapOn=false to allow kubelet to start even if swap is present at OS level
// - Control plane nodes have failSwapOn=true to prevent kubelet from starting if swap is enabled
// - All nodes have swapBehavior=NoSwap to ensure kubelet does not utilize swap even if available at OS level
// The swapBehavior=NoSwap configuration ensures that even if swap is manually enabled on a worker node,
// the kubelet will not use it for memory management, maintaining consistent behavior across the cluster.
g.It("should have correct default kubelet swap settings with worker nodes failSwapOn=false, control plane nodes failSwapOn=true, and both swapBehavior=NoSwap [OCP-86394]", ote.Informing(), func(ctx context.Context) {
g.By("Getting worker nodes")
allWorkerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker")
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(len(allWorkerNodes)).Should(o.BeNumerically(">", 0), "Expected at least one worker node")

// Filter out nodes that are also control plane (e.g., SNO)
workerNodes := getPureWorkerNodes(allWorkerNodes)

g.By("Validating kubelet configuration on each worker node")
for _, node := range workerNodes {
config, err := getKubeletConfigFromNode(ctx, oc, node.Name)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get kubelet config for worker node %s", node.Name)

g.By(fmt.Sprintf("Checking failSwapOn=false on worker node %s", node.Name))
o.Expect(config.FailSwapOn).NotTo(o.BeNil(), "failSwapOn should be set on worker node %s", node.Name)
o.Expect(*config.FailSwapOn).To(o.BeFalse(), "failSwapOn should be false on worker node %s", node.Name)
framework.Logf("Worker node %s: failSwapOn=%v ✓", node.Name, *config.FailSwapOn)

g.By(fmt.Sprintf("Checking swapBehavior=NoSwap on worker node %s", node.Name))
o.Expect(config.MemorySwap).NotTo(o.BeNil(), "memorySwap should be set on worker node %s", node.Name)
o.Expect(config.MemorySwap.SwapBehavior).To(o.Equal("NoSwap"), "swapBehavior should be NoSwap on worker node %s", node.Name)
framework.Logf("Worker node %s: swapBehavior=%s ✓", node.Name, config.MemorySwap.SwapBehavior)
}

// Skip control plane validation on Hypershift (control plane is external)
controlPlaneTopology, err := exutil.GetControlPlaneTopology(oc)
o.Expect(err).NotTo(o.HaveOccurred())

if *controlPlaneTopology != configv1.ExternalTopologyMode {
g.By("Getting control plane nodes")
controlPlaneNodes, err := getControlPlaneNodes(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(len(controlPlaneNodes)).Should(o.BeNumerically(">", 0), "Expected at least one control plane node")

g.By("Validating kubelet configuration on each control plane node")
for _, node := range controlPlaneNodes {
config, err := getKubeletConfigFromNode(ctx, oc, node.Name)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get kubelet config for control plane node %s", node.Name)

g.By(fmt.Sprintf("Checking failSwapOn=true on control plane node %s", node.Name))
o.Expect(config.FailSwapOn).NotTo(o.BeNil(), "failSwapOn should be set on control plane node %s", node.Name)
o.Expect(*config.FailSwapOn).To(o.BeTrue(), "failSwapOn should be true on control plane node %s", node.Name)
framework.Logf("Control plane node %s: failSwapOn=%v ✓", node.Name, *config.FailSwapOn)

g.By(fmt.Sprintf("Checking swapBehavior=NoSwap on control plane node %s", node.Name))
o.Expect(config.MemorySwap).NotTo(o.BeNil(), "memorySwap should be set on control plane node %s", node.Name)
o.Expect(config.MemorySwap.SwapBehavior).To(o.Equal("NoSwap"), "swapBehavior should be NoSwap on control plane node %s", node.Name)
framework.Logf("Control plane node %s: swapBehavior=%s ✓", node.Name, config.MemorySwap.SwapBehavior)
}
} else {
framework.Logf("Skipping control plane validation on Hypershift (external control plane)")
}
framework.Logf("Test PASSED: All nodes have correct default swap settings")
})

g.It("should reject user override of swap settings via KubeletConfig API [OCP-86395]", ote.Informing(), func(ctx context.Context) {
// Skip on Hypershift - MachineConfig API is not available
controlPlaneTopology, err := exutil.GetControlPlaneTopology(oc)
o.Expect(err).NotTo(o.HaveOccurred())
if *controlPlaneTopology == configv1.ExternalTopologyMode {
g.Skip("Skipping test on Hypershift - MachineConfig API not available")
}

g.By("Creating machine config client")
mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig())
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to create machine config client")

g.By("Getting initial machine config resourceVersion")
// Get the initial resourceVersion of the worker machine config before creating KubeletConfig
workerMC, err := mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
initialResourceVersion := ""
if err == nil {
initialResourceVersion = workerMC.ResourceVersion
framework.Logf("Initial %s resourceVersion: %s", workerGeneratedKubeletMC, initialResourceVersion)
}

Comment on lines +118 to +126
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not silently skip MachineConfig invariance verification on API errors.

Both reads of 99-worker-generated-kubelet are optional (if err == nil), so the test can pass without proving the machine config remained unchanged. Fail the test when these reads fail.

Suggested fix
-		workerMC, err := mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
-		initialResourceVersion := ""
-		if err == nil {
-			initialResourceVersion = workerMC.ResourceVersion
-			framework.Logf("Initial %s resourceVersion: %s", workerGeneratedKubeletMC, initialResourceVersion)
-		}
+		workerMC, err := mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
+		o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get initial machine config %s", workerGeneratedKubeletMC)
+		initialResourceVersion := workerMC.ResourceVersion
+		framework.Logf("Initial %s resourceVersion: %s", workerGeneratedKubeletMC, initialResourceVersion)
...
-		workerMC, err = mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
-		if err == nil {
-			o.Expect(workerMC.ResourceVersion).To(o.Equal(initialResourceVersion), "Machine config %s should not be updated when failSwapOn is rejected", workerGeneratedKubeletMC)
-			framework.Logf("Verified: %s was not updated (resourceVersion: %s)", workerGeneratedKubeletMC, workerMC.ResourceVersion)
-		}
+		workerMC, err = mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
+		o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get machine config %s after KubeletConfig rejection", workerGeneratedKubeletMC)
+		o.Expect(workerMC.ResourceVersion).To(o.Equal(initialResourceVersion), "Machine config %s should not be updated when failSwapOn is rejected", workerGeneratedKubeletMC)
+		framework.Logf("Verified: %s was not updated (resourceVersion: %s)", workerGeneratedKubeletMC, workerMC.ResourceVersion)

Also applies to: 182-186

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/extended/node/node_swap.go` around lines 108 - 116, The test currently
ignores errors when reading the worker MachineConfig (calls to
mcClient.MachineconfigurationV1().MachineConfigs().Get for
workerGeneratedKubeletMC), which allows the test to pass without verifying
invariance; update the code that retrieves workerMC and later reads (the second
Get) to fail the test on error instead of silently continuing—i.e., if err !=
nil call framework.Failf or return an error from the test with a clear message
mentioning workerGeneratedKubeletMC and the underlying err; keep assigning
initialResourceVersion and comparing later as before but only after successful
Gets.

g.By("Creating a KubeletConfig with swap settings")
kcName := fmt.Sprintf("test-swap-override-%d", time.Now().UnixNano())
kubeletConfig := &machineconfigv1.KubeletConfig{
ObjectMeta: metav1.ObjectMeta{
Name: kcName,
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Spec: machineconfigv1.KubeletConfigSpec{
KubeletConfig: &runtime.RawExtension{
Raw: []byte(`{
"failSwapOn": true,
"memorySwap": {
"swapBehavior": "LimitedSwap"
}
}`),
},
},
}

g.By("Attempting to apply the KubeletConfig")
defer func() {
if err := mcClient.MachineconfigurationV1().KubeletConfigs().Delete(ctx, kcName, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
framework.Logf("cleanup failed for KubeletConfig %s: %v", kcName, err)
}
}()
framework.Logf("Creating KubeletConfig with failSwapOn=true and swapBehavior=LimitedSwap")
_, err = mcClient.MachineconfigurationV1().KubeletConfigs().Create(ctx, kubeletConfig, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to create KubeletConfig")

g.By("Checking KubeletConfig status for expected error message")
err = wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
kc, err := mcClient.MachineconfigurationV1().KubeletConfigs().Get(ctx, kcName, metav1.GetOptions{})
if err != nil {
return false, err
}

if kc.Status.ObservedGeneration != kc.Generation {
framework.Logf("Waiting for controller to process generation %d (current: %d)", kc.Generation, kc.Status.ObservedGeneration)
return false, nil
}

// Fail fast if KubeletConfig was unexpectedly accepted
for _, condition := range kc.Status.Conditions {
if condition.Type == machineconfigv1.KubeletConfigSuccess && condition.Status == corev1.ConditionTrue {
return false, fmt.Errorf("KubeletConfig was unexpectedly accepted")
}
}

// Check for Failure condition with the expected error message
for _, condition := range kc.Status.Conditions {
if condition.Type == machineconfigv1.KubeletConfigFailure && condition.Status == corev1.ConditionTrue {
framework.Logf("Found Failure condition: %s", condition.Message)
if strings.Contains(condition.Message, "failSwapOn is not allowed to be set") {
return true, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "Expected to find error message about failSwapOn not being allowed in KubeletConfig status")

g.By("Verifying machine config was not created or updated")
// Wait a bit to ensure no update happens
time.Sleep(5 * time.Second)

// Check if the machine config was created or updated (compare to initial resourceVersion captured earlier)
workerMC, err = mcClient.MachineconfigurationV1().MachineConfigs().Get(ctx, workerGeneratedKubeletMC, metav1.GetOptions{})
if err == nil {
o.Expect(workerMC.ResourceVersion).To(o.Equal(initialResourceVersion), "Machine config %s should not be updated when failSwapOn is rejected", workerGeneratedKubeletMC)
framework.Logf("Verified: %s was not updated (resourceVersion: %s)", workerGeneratedKubeletMC, workerMC.ResourceVersion)
}

g.By("Verifying worker nodes still have correct swap settings")
allWorkerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker")
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(len(allWorkerNodes)).Should(o.BeNumerically(">", 0), "Expected at least one worker node")

// Filter out nodes that are also control plane (e.g., SNO)
workerNodes := getPureWorkerNodes(allWorkerNodes)

for _, node := range workerNodes {
config, err := getKubeletConfigFromNode(ctx, oc, node.Name)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get kubelet config for worker node %s", node.Name)

g.By(fmt.Sprintf("Verifying failSwapOn=false remains unchanged on worker node %s", node.Name))
o.Expect(config.FailSwapOn).NotTo(o.BeNil(), "failSwapOn should be set on worker node %s", node.Name)
o.Expect(*config.FailSwapOn).To(o.BeFalse(), "failSwapOn should still be false on worker node %s after rejection", node.Name)
framework.Logf("Worker node %s: failSwapOn=%v (unchanged) ✓", node.Name, *config.FailSwapOn)

g.By(fmt.Sprintf("Verifying swapBehavior=NoSwap remains unchanged on worker node %s", node.Name))
o.Expect(config.MemorySwap).NotTo(o.BeNil(), "memorySwap should be set on worker node %s", node.Name)
o.Expect(config.MemorySwap.SwapBehavior).To(o.Equal("NoSwap"), "swapBehavior should still be NoSwap on worker node %s after rejection", node.Name)
framework.Logf("Worker node %s: swapBehavior=%s (unchanged) ✓", node.Name, config.MemorySwap.SwapBehavior)
}

framework.Logf("Test PASSED: KubeletConfig with failSwapOn was properly rejected")
})
})
83 changes: 83 additions & 0 deletions test/extended/node/node_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package node

import (
"context"
"encoding/json"
"fmt"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
"k8s.io/kubernetes/test/e2e/framework"

exutil "github.com/openshift/origin/test/extended/util"
)

// getNodesByLabel returns nodes matching the specified label selector
func getNodesByLabel(ctx context.Context, oc *exutil.CLI, labelSelector string) ([]v1.Node, error) {
nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return nil, err
}
return nodes.Items, nil
}

// getControlPlaneNodes returns all control plane nodes in the cluster
func getControlPlaneNodes(ctx context.Context, oc *exutil.CLI) ([]v1.Node, error) {
// Try master label first (OpenShift uses this)
nodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/master")
if err != nil {
return nil, err
}
if len(nodes) > 0 {
return nodes, nil
}

// Fallback to control-plane label (upstream Kubernetes uses this)
return getNodesByLabel(ctx, oc, "node-role.kubernetes.io/control-plane")
}

// getKubeletConfigFromNode retrieves the kubelet configuration from a specific node
func getKubeletConfigFromNode(ctx context.Context, oc *exutil.CLI, nodeName string) (*kubeletconfigv1beta1.KubeletConfiguration, error) {
// Use the node proxy API to get configz
configzPath := fmt.Sprintf("/api/v1/nodes/%s/proxy/configz", nodeName)

data, err := oc.AdminKubeClient().CoreV1().RESTClient().Get().AbsPath(configzPath).DoRaw(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get configz from node %s: %w", nodeName, err)
}

// Parse the JSON response
var configzResponse struct {
KubeletConfig *kubeletconfigv1beta1.KubeletConfiguration `json:"kubeletconfig"`
}

if err := json.Unmarshal(data, &configzResponse); err != nil {
return nil, fmt.Errorf("failed to unmarshal configz response: %w", err)
}

if configzResponse.KubeletConfig == nil {
return nil, fmt.Errorf("kubeletconfig is nil in response")
}

return configzResponse.KubeletConfig, nil
}

// getPureWorkerNodes returns worker nodes that are not also control plane nodes.
// On SNO clusters, the single node has both worker and control-plane roles,
// so it should be validated as a control plane node (failSwapOn=true), not as a worker.
func getPureWorkerNodes(nodes []v1.Node) []v1.Node {
var pureWorkers []v1.Node
for _, node := range nodes {
_, hasControlPlane := node.Labels["node-role.kubernetes.io/control-plane"]
_, hasMaster := node.Labels["node-role.kubernetes.io/master"]
if hasControlPlane || hasMaster {
framework.Logf("Skipping worker validation for node %s (also has control-plane role, e.g. SNO)", node.Name)
continue
}
pureWorkers = append(pureWorkers, node)
}
return pureWorkers
}