Skip to content

Commit 9cb3ad9

Browse files
Test for AutoNodeSizing
1 parent 394edfe commit 9cb3ad9

4 files changed

Lines changed: 423 additions & 154 deletions

File tree

test/extended/node/node_e2e/node.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@ import (
66

77
g "github.com/onsi/ginkgo/v2"
88
o "github.com/onsi/gomega"
9+
nodeutils "github.com/openshift/origin/test/extended/node"
910
exutil "github.com/openshift/origin/test/extended/util"
1011
"k8s.io/apimachinery/pkg/util/wait"
1112
e2e "k8s.io/kubernetes/test/e2e/framework"
1213
)
1314

1415
var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
1516
var (
16-
oc = exutil.NewCLIWithoutNamespace("node").AsAdmin()
17+
oc = exutil.NewCLIWithoutNamespace("node")
1718
)
1819

1920
//author: asahay@redhat.com
2021
g.It("[OTP] validate KUBELET_LOG_LEVEL", func() {
2122
var kubeservice string
22-
var kublet string
23+
var kubelet string
2324
var err error
2425

2526
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
@@ -33,28 +34,28 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
3334
g.By("Polling to check kubelet log level on ready nodes")
3435
waitErr := wait.Poll(10*time.Second, 1*time.Minute, func() (bool, error) {
3536
g.By("Getting all node names in the cluster")
36-
nodeName, nodeErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("nodes", "-o=jsonpath={.items[*].metadata.name}").Output()
37+
nodeName, nodeErr := oc.AsAdmin().Run("get").Args("nodes", "-o=jsonpath={.items[*].metadata.name}").Output()
3738
o.Expect(nodeErr).NotTo(o.HaveOccurred())
3839
e2e.Logf("\nNode Names are %v", nodeName)
3940
nodes := strings.Fields(nodeName)
4041

4142
for _, node := range nodes {
4243
g.By("Checking if node " + node + " is Ready")
43-
nodeStatus, statusErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("nodes", node, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
44+
nodeStatus, statusErr := oc.AsAdmin().Run("get").Args("nodes", node, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
4445
o.Expect(statusErr).NotTo(o.HaveOccurred())
4546
e2e.Logf("\nNode %s Status is %s\n", node, nodeStatus)
4647

4748
if nodeStatus == "True" {
4849
g.By("Checking KUBELET_LOG_LEVEL in kubelet.service on node " + node)
49-
kubeservice, err = oc.AsAdmin().WithoutNamespace().Run("debug").Args("node/"+node, "-ndefault", "--", "chroot", "/host", "/bin/bash", "-c", "systemctl show kubelet.service | grep KUBELET_LOG_LEVEL").Output()
50+
kubeservice, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "systemctl show kubelet.service | grep KUBELET_LOG_LEVEL")
5051
o.Expect(err).NotTo(o.HaveOccurred())
5152

5253
g.By("Checking kubelet process for --v=2 flag on node " + node)
53-
kublet, err = oc.AsAdmin().WithoutNamespace().Run("debug").Args("node/"+node, "-ndefault", "--", "chroot", "/host", "/bin/bash", "-c", "ps aux | grep kubelet").Output()
54+
kubelet, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "ps aux | grep [k]ubelet")
5455
o.Expect(err).NotTo(o.HaveOccurred())
5556

5657
g.By("Verifying KUBELET_LOG_LEVEL is set and kubelet is running with --v=2")
57-
if strings.Contains(string(kubeservice), "KUBELET_LOG_LEVEL") && strings.Contains(string(kublet), "--v=2") {
58+
if strings.Contains(kubeservice, "KUBELET_LOG_LEVEL") && strings.Contains(kubelet, "--v=2") {
5859
e2e.Logf("KUBELET_LOG_LEVEL is 2.\n")
5960
return true, nil
6061
} else {
@@ -70,7 +71,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
7071

7172
if waitErr != nil {
7273
e2e.Logf("Kubelet Log level is:\n %v\n", kubeservice)
73-
e2e.Logf("Running Process of kubelet are:\n %v\n", kublet)
74+
e2e.Logf("Running Process of kubelet are:\n %v\n", kubelet)
7475
}
7576
o.Expect(waitErr).NotTo(o.HaveOccurred(), "KUBELET_LOG_LEVEL is not expected, timed out")
7677
})

test/extended/node/node_sizing.go

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
package node
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
9+
g "github.com/onsi/ginkgo/v2"
10+
o "github.com/onsi/gomega"
11+
corev1 "k8s.io/api/core/v1"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/types"
14+
"k8s.io/kubernetes/test/e2e/framework"
15+
16+
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
17+
machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
18+
exutil "github.com/openshift/origin/test/extended/util"
19+
)
20+
21+
var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptive] Node sizing", func() {
22+
defer g.GinkgoRecover()
23+
24+
oc := exutil.NewCLIWithoutNamespace("node-sizing")
25+
26+
g.BeforeEach(func(ctx context.Context) {
27+
// Skip all tests on MicroShift clusters
28+
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
29+
o.Expect(err).NotTo(o.HaveOccurred())
30+
if isMicroShift {
31+
g.Skip("Skipping test on MicroShift cluster")
32+
}
33+
})
34+
35+
g.It("should have NODE_SIZING_ENABLED=true by default and NODE_SIZING_ENABLED=false when KubeletConfig with autoSizingReserved=false is applied", func(ctx context.Context) {
36+
37+
mcClient, err := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig())
38+
o.Expect(err).NotTo(o.HaveOccurred(), "Error creating MCO client")
39+
40+
testMCPName := "node-sizing-test"
41+
testNodeMCPLabel := fmt.Sprintf("node-role.kubernetes.io/%s", testMCPName)
42+
kubeletConfigName := "auto-sizing-enabled"
43+
44+
// Verify the default state (NODE_SIZING_ENABLED=false)
45+
// This feature is added in OCP 4.21
46+
g.By("Getting a worker node to test")
47+
nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{
48+
LabelSelector: "node-role.kubernetes.io/worker",
49+
})
50+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to list worker nodes")
51+
o.Expect(len(nodes.Items)).To(o.BeNumerically(">", 0), "Should have at least one worker node")
52+
53+
// Select first worker node and label it for our custom MCP
54+
// This approach is taken so that all the nodes do not restart at the same time for the test
55+
nodeName := nodes.Items[0].Name
56+
framework.Logf("Testing on node: %s", nodeName)
57+
58+
// Define cleanup function for node label before applying the label
59+
cleanupNodeLabel := func() {
60+
g.By(fmt.Sprintf("Removing node label %s from node %s", testNodeMCPLabel, nodeName))
61+
cleanupCtx := context.Background()
62+
// Use JSON patch to remove the label atomically
63+
patchData := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:null}}}`, testNodeMCPLabel))
64+
_, updateErr := oc.AdminKubeClient().CoreV1().Nodes().Patch(cleanupCtx, nodeName, types.MergePatchType, patchData, metav1.PatchOptions{})
65+
if updateErr != nil {
66+
framework.Logf("Failed to remove label from node %s: %v", nodeName, updateErr)
67+
return
68+
}
69+
70+
// Wait for the node to transition back to the worker pool configuration
71+
g.By(fmt.Sprintf("Waiting for node %s to transition back to worker pool", nodeName))
72+
o.Eventually(func() bool {
73+
currentNode, err := oc.AdminKubeClient().CoreV1().Nodes().Get(cleanupCtx, nodeName, metav1.GetOptions{})
74+
if err != nil {
75+
framework.Logf("Error getting node: %v", err)
76+
return false
77+
}
78+
currentConfig := currentNode.Annotations["machineconfiguration.openshift.io/currentConfig"]
79+
desiredConfig := currentNode.Annotations["machineconfiguration.openshift.io/desiredConfig"]
80+
81+
// Check if the node is using a worker config (not node-sizing-test config)
82+
isWorkerConfig := currentConfig != "" && !strings.Contains(currentConfig, testMCPName) && currentConfig == desiredConfig
83+
if isWorkerConfig {
84+
framework.Logf("Node %s successfully transitioned to worker config: %s", nodeName, currentConfig)
85+
} else {
86+
framework.Logf("Node %s still transitioning: current=%s, desired=%s", nodeName, currentConfig, desiredConfig)
87+
}
88+
return isWorkerConfig
89+
}, 7*time.Minute, 10*time.Second).Should(o.BeTrue(), fmt.Sprintf("Node %s should transition back to worker pool", nodeName))
90+
}
91+
92+
g.By(fmt.Sprintf("Labeling node %s with %s", nodeName, testNodeMCPLabel))
93+
patchData := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:""}}}`, testNodeMCPLabel))
94+
_, err = oc.AdminKubeClient().CoreV1().Nodes().Patch(ctx, nodeName, types.MergePatchType, patchData, metav1.PatchOptions{})
95+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to label node")
96+
97+
// Register cleanup immediately after successful label application
98+
g.DeferCleanup(cleanupNodeLabel)
99+
100+
// Create custom MCP
101+
g.By(fmt.Sprintf("Creating custom MachineConfigPool %s", testMCPName))
102+
testMCP := &mcfgv1.MachineConfigPool{
103+
TypeMeta: metav1.TypeMeta{
104+
APIVersion: "machineconfiguration.openshift.io/v1",
105+
Kind: "MachineConfigPool",
106+
},
107+
ObjectMeta: metav1.ObjectMeta{
108+
Name: testMCPName,
109+
Labels: map[string]string{
110+
"machineconfiguration.openshift.io/pool": testMCPName,
111+
},
112+
},
113+
Spec: mcfgv1.MachineConfigPoolSpec{
114+
MachineConfigSelector: &metav1.LabelSelector{
115+
MatchExpressions: []metav1.LabelSelectorRequirement{
116+
{
117+
Key: "machineconfiguration.openshift.io/role",
118+
Operator: metav1.LabelSelectorOpIn,
119+
Values: []string{"worker", testMCPName},
120+
},
121+
},
122+
},
123+
NodeSelector: &metav1.LabelSelector{
124+
MatchLabels: map[string]string{
125+
testNodeMCPLabel: "",
126+
},
127+
},
128+
},
129+
}
130+
131+
_, err = mcClient.MachineconfigurationV1().MachineConfigPools().Create(ctx, testMCP, metav1.CreateOptions{})
132+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to create custom MachineConfigPool")
133+
134+
cleanupMCP := func() {
135+
g.By("Cleaning up custom MachineConfigPool")
136+
cleanupCtx := context.Background()
137+
deleteErr := mcClient.MachineconfigurationV1().MachineConfigPools().Delete(cleanupCtx, testMCPName, metav1.DeleteOptions{})
138+
if deleteErr != nil {
139+
framework.Logf("Failed to delete MachineConfigPool %s: %v", testMCPName, deleteErr)
140+
}
141+
}
142+
143+
// Register DeferCleanup so cleanup happens even on test failure
144+
// DeferCleanup runs in LIFO order: MCP deleted last (registered first)
145+
// Note: cleanupNodeLabel already registered immediately after node labeling
146+
g.DeferCleanup(cleanupMCP)
147+
148+
g.By("Waiting for custom MachineConfigPool to be ready")
149+
err = waitForMCP(ctx, mcClient, testMCPName, 5*time.Minute)
150+
o.Expect(err).NotTo(o.HaveOccurred(), "Custom MachineConfigPool should become ready")
151+
152+
verifyNodeSizingEnabledFile(oc, nodeName, "true")
153+
154+
// Now apply KubeletConfig and verify NODE_SIZING_ENABLED=false
155+
156+
g.By("Creating KubeletConfig with autoSizingReserved=false")
157+
autoSizingReserved := false
158+
kubeletConfig := &mcfgv1.KubeletConfig{
159+
TypeMeta: metav1.TypeMeta{
160+
APIVersion: "machineconfiguration.openshift.io/v1",
161+
Kind: "KubeletConfig",
162+
},
163+
ObjectMeta: metav1.ObjectMeta{
164+
Name: kubeletConfigName,
165+
},
166+
Spec: mcfgv1.KubeletConfigSpec{
167+
AutoSizingReserved: &autoSizingReserved,
168+
MachineConfigPoolSelector: &metav1.LabelSelector{
169+
MatchLabels: map[string]string{
170+
"machineconfiguration.openshift.io/pool": testMCPName,
171+
},
172+
},
173+
},
174+
}
175+
176+
_, err = mcClient.MachineconfigurationV1().KubeletConfigs().Create(ctx, kubeletConfig, metav1.CreateOptions{})
177+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to create KubeletConfig")
178+
179+
cleanupKubeletConfig := func() {
180+
g.By("Cleaning up KubeletConfig")
181+
cleanupCtx := context.Background()
182+
deleteErr := mcClient.MachineconfigurationV1().KubeletConfigs().Delete(cleanupCtx, kubeletConfigName, metav1.DeleteOptions{})
183+
if deleteErr != nil {
184+
framework.Logf("Failed to delete KubeletConfig %s: %v", kubeletConfigName, deleteErr)
185+
}
186+
187+
// Wait for custom MCP to be ready after cleanup
188+
g.By("Waiting for custom MCP to be ready after KubeletConfig deletion")
189+
waitErr := waitForMCP(cleanupCtx, mcClient, testMCPName, 5*time.Minute)
190+
if waitErr != nil {
191+
framework.Logf("Failed to wait for custom MCP to be ready: %v", waitErr)
192+
}
193+
}
194+
g.DeferCleanup(cleanupKubeletConfig)
195+
196+
g.By("Waiting for KubeletConfig to be created")
197+
var createdKC *mcfgv1.KubeletConfig
198+
o.Eventually(func() error {
199+
createdKC, err = mcClient.MachineconfigurationV1().KubeletConfigs().Get(ctx, kubeletConfigName, metav1.GetOptions{})
200+
return err
201+
}, 30*time.Second, 5*time.Second).Should(o.Succeed(), "KubeletConfig should be created")
202+
203+
o.Expect(createdKC.Spec.AutoSizingReserved).NotTo(o.BeNil(), "AutoSizingReserved should not be nil")
204+
o.Expect(*createdKC.Spec.AutoSizingReserved).To(o.BeFalse(), "AutoSizingReserved should be false")
205+
206+
g.By(fmt.Sprintf("Waiting for %s MCP to start updating", testMCPName))
207+
o.Eventually(func() bool {
208+
mcp, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, testMCPName, metav1.GetOptions{})
209+
if err != nil {
210+
framework.Logf("Error getting %s MCP: %v", testMCPName, err)
211+
return false
212+
}
213+
// Check if MCP is updating (has conditions indicating update in progress)
214+
for _, condition := range mcp.Status.Conditions {
215+
if condition.Type == "Updating" && condition.Status == corev1.ConditionTrue {
216+
return true
217+
}
218+
}
219+
return false
220+
}, 2*time.Minute, 10*time.Second).Should(o.BeTrue(), fmt.Sprintf("%s MCP should start updating", testMCPName))
221+
222+
g.By(fmt.Sprintf("Waiting for %s MCP to be ready with new configuration", testMCPName))
223+
err = waitForMCP(ctx, mcClient, testMCPName, 15*time.Minute)
224+
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("%s MCP should become ready with new configuration", testMCPName))
225+
226+
verifyNodeSizingEnabledFile(oc, nodeName, "false")
227+
228+
// Explicit cleanup on success; DeferCleanup ensures cleanup also runs on failure
229+
cleanupKubeletConfig()
230+
cleanupNodeLabel()
231+
cleanupMCP()
232+
})
233+
})
234+
235+
// verifyNodeSizingEnabledFile verifies the NODE_SIZING_ENABLED value in the env file
236+
func verifyNodeSizingEnabledFile(oc *exutil.CLI, nodeName, expectedValue string) {
237+
g.By("Verifying /etc/node-sizing-enabled.env file exists")
238+
239+
output, err := ExecOnNodeWithChroot(oc, nodeName, "test", "-f", "/etc/node-sizing-enabled.env")
240+
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("File /etc/node-sizing-enabled.env should exist on node %s. Output: %s", nodeName, output))
241+
242+
g.By("Reading /etc/node-sizing-enabled.env file contents")
243+
output, err = ExecOnNodeWithChroot(oc, nodeName, "cat", "/etc/node-sizing-enabled.env")
244+
o.Expect(err).NotTo(o.HaveOccurred(), "Should be able to read /etc/node-sizing-enabled.env")
245+
246+
framework.Logf("Contents of /etc/node-sizing-enabled.env:\n%s", output)
247+
248+
g.By(fmt.Sprintf("Verifying NODE_SIZING_ENABLED=%s is set in the file", expectedValue))
249+
o.Expect(strings.TrimSpace(output)).To(o.ContainSubstring(fmt.Sprintf("NODE_SIZING_ENABLED=%s", expectedValue)),
250+
fmt.Sprintf("File should contain NODE_SIZING_ENABLED=%s", expectedValue))
251+
252+
framework.Logf("Successfully verified NODE_SIZING_ENABLED=%s on node %s", expectedValue, nodeName)
253+
}

0 commit comments

Comments
 (0)