Skip to content

Commit ee9cd82

Browse files
committed
Validate no WAL corruption when both nodes shutdown gracefully
1 parent 107849e commit ee9cd82

1 file changed

Lines changed: 31 additions & 0 deletions

File tree

test/extended/two_node/tnf_recovery.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"math/rand"
77
"os"
8+
"strings"
89
"time"
910

1011
g "github.com/onsi/ginkgo/v2"
@@ -407,6 +408,36 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
407408
&targetNode, true, false, // targetNode expected started == true, learner == false
408409
6*time.Minute, 45*time.Second)
409410
})
411+
412+
g.It("should recover after simultaneous graceful shutdown of both nodes", func() {
413+
g.GinkgoT().Printf("Gracefully rebooting both nodes: %s and %s\n",
414+
targetNode.Name, peerNode.Name)
415+
416+
g.By(fmt.Sprintf("Triggering graceful reboot on %s", targetNode.Name))
417+
err := exutil.TriggerNodeRebootGraceful(oc.KubeClient(), targetNode.Name)
418+
o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to trigger graceful reboot on %s without error", targetNode.Name))
419+
420+
g.By(fmt.Sprintf("Triggering graceful reboot on %s", peerNode.Name))
421+
err = exutil.TriggerNodeRebootGraceful(oc.KubeClient(), peerNode.Name)
422+
o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to trigger graceful reboot on %s without error", peerNode.Name))
423+
424+
g.By("Waiting for graceful shutdown to take effect (shutdown -r 1 schedules reboot in 1 minute)")
425+
time.Sleep(90 * time.Second)
426+
427+
g.By(fmt.Sprintf("Waiting for both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot))
428+
validateEtcdRecoveryState(oc, etcdClientFactory,
429+
&targetNode,
430+
&peerNode, true, false,
431+
membersHealthyAfterDoubleReboot, utils.FiveSecondPollInterval)
432+
433+
g.By("Verifying etcd containers are running on both nodes")
434+
for _, node := range []corev1.Node{targetNode, peerNode} {
435+
got, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-etcd",
436+
strings.Split(ensurePodmanEtcdContainerIsRunning, " ")...)
437+
o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected no error checking etcd on %s", node.Name))
438+
o.Expect(got).To(o.Equal("'true'"), fmt.Sprintf("Expected etcd container running on %s", node.Name))
439+
}
440+
})
410441
})
411442

412443
func validateEtcdRecoveryState(

0 commit comments

Comments
 (0)