|
5 | 5 | "fmt" |
6 | 6 | "math/rand" |
7 | 7 | "os" |
| 8 | + "strings" |
8 | 9 | "time" |
9 | 10 |
|
10 | 11 | g "github.com/onsi/ginkgo/v2" |
@@ -409,6 +410,61 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual |
409 | 410 | }) |
410 | 411 | }) |
411 | 412 |
|
| 413 | +var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][Suite:openshift/two-node][Serial][Disruptive] Two Node with Fencing etcd recovery", func() { |
| 414 | + defer g.GinkgoRecover() |
| 415 | + |
| 416 | + var ( |
| 417 | + oc = exutil.NewCLIWithoutNamespace("").AsAdmin() |
| 418 | + etcdClientFactory *helpers.EtcdClientFactoryImpl |
| 419 | + peerNode, targetNode corev1.Node |
| 420 | + ) |
| 421 | + |
| 422 | + g.BeforeEach(func() { |
| 423 | + utils.SkipIfNotTopology(oc, v1.DualReplicaTopologyMode) |
| 424 | + |
| 425 | + etcdClientFactory = helpers.NewEtcdClientFactory(oc.KubeClient()) |
| 426 | + |
| 427 | + utils.SkipIfClusterIsNotHealthy(oc, etcdClientFactory) |
| 428 | + |
| 429 | + nodes, err := utils.GetNodes(oc, utils.AllNodes) |
| 430 | + o.Expect(err).ShouldNot(o.HaveOccurred(), "Expected to retrieve nodes without error") |
| 431 | + |
| 432 | + randomIndex := rand.Intn(len(nodes.Items)) |
| 433 | + peerNode = nodes.Items[randomIndex] |
| 434 | + targetNode = nodes.Items[(randomIndex+1)%len(nodes.Items)] |
| 435 | + }) |
| 436 | + |
| 437 | + g.It("should recover after simultaneous graceful shutdown of both nodes", func() { |
| 438 | + g.GinkgoT().Printf("Gracefully rebooting both nodes: %s and %s\n", |
| 439 | + targetNode.Name, peerNode.Name) |
| 440 | + |
| 441 | + g.By(fmt.Sprintf("Triggering graceful reboot on %s", targetNode.Name)) |
| 442 | + err := exutil.TriggerNodeRebootGraceful(oc.KubeClient(), targetNode.Name) |
| 443 | + o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to trigger graceful reboot on %s without error", targetNode.Name)) |
| 444 | + |
| 445 | + g.By(fmt.Sprintf("Triggering graceful reboot on %s", peerNode.Name)) |
| 446 | + err = exutil.TriggerNodeRebootGraceful(oc.KubeClient(), peerNode.Name) |
| 447 | + o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to trigger graceful reboot on %s without error", peerNode.Name)) |
| 448 | + |
| 449 | + g.By("Waiting for graceful shutdown to take effect (shutdown -r 1 schedules reboot in 1 minute)") |
| 450 | + time.Sleep(90 * time.Second) |
| 451 | + |
| 452 | + g.By(fmt.Sprintf("Waiting for both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot)) |
| 453 | + validateEtcdRecoveryState(oc, etcdClientFactory, |
| 454 | + &targetNode, |
| 455 | + &peerNode, true, false, |
| 456 | + membersHealthyAfterDoubleReboot, utils.FiveSecondPollInterval) |
| 457 | + |
| 458 | + g.By("Verifying etcd containers are running on both nodes") |
| 459 | + for _, node := range []corev1.Node{targetNode, peerNode} { |
| 460 | + got, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-etcd", |
| 461 | + strings.Split(ensurePodmanEtcdContainerIsRunning, " ")...) |
| 462 | + o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected no error checking etcd on %s", node.Name)) |
| 463 | + o.Expect(got).To(o.Equal("'true'"), fmt.Sprintf("Expected etcd container running on %s", node.Name)) |
| 464 | + } |
| 465 | + }) |
| 466 | +}) |
| 467 | + |
412 | 468 | func validateEtcdRecoveryState( |
413 | 469 | oc *exutil.CLI, e *helpers.EtcdClientFactoryImpl, |
414 | 470 | survivedNode, targetNode *corev1.Node, |
|
0 commit comments