Skip to content

Commit b3cf6dc

Browse files
authored
failover set offset via rnd and not via reconcile count (#664)
1 parent 526a22a commit b3cf6dc

3 files changed

Lines changed: 160 additions & 150 deletions

File tree

cmd/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,8 @@ func main() {
622622
"reconcileInterval", failoverConfig.ReconcileInterval,
623623
"revalidationInterval", failoverConfig.RevalidationInterval,
624624
"trustHypervisorLocation", failoverConfig.TrustHypervisorLocation,
625-
"maxVMsToProcess", failoverConfig.MaxVMsToProcess)
625+
"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
626+
"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
626627
}
627628

628629
// +kubebuilder:scaffold:builder

internal/scheduling/reservations/failover/controller.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package failover
66
import (
77
"context"
88
"fmt"
9+
"math/rand/v2"
910
"path/filepath"
1011
"slices"
1112
"sort"
@@ -518,7 +519,11 @@ func (c *FailoverReservationController) selectVMsToProcess(
518519
offset := 0
519520
rotationInterval := *c.Config.VMSelectionRotationInterval
520521
if rotationInterval > 0 && c.reconcileCount%int64(rotationInterval) == 0 {
521-
offset = int(c.reconcileCount) % len(vmsMissingFailover)
522+
offset = rand.IntN(len(vmsMissingFailover)) //nolint:gosec // non-cryptographic randomness is fine for VM selection rotation
523+
logger.Info("applying random rotation offset for VM selection",
524+
"offset", offset,
525+
"totalVMs", len(vmsMissingFailover),
526+
"rotationInterval", rotationInterval)
522527
}
523528

524529
selected = make([]vmFailoverNeed, 0, maxToProcess)

internal/scheduling/reservations/failover/controller_test.go

Lines changed: 152 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ func getAllocations(res *v1alpha1.Reservation) map[string]string {
855855
// ============================================================================
856856

857857
func TestSelectVMsToProcess(t *testing.T) {
858-
// Create 10 VMs with different memory sizes (sorted by memory descending)
858+
// Create VMs with different memory sizes (sorted by memory descending)
859859
createVMs := func(count int) []vmFailoverNeed {
860860
vms := make([]vmFailoverNeed, count)
861861
for i := range count {
@@ -873,166 +873,170 @@ func TestSelectVMsToProcess(t *testing.T) {
873873
return vms
874874
}
875875

876-
tests := []struct {
877-
name string
878-
reconcileCount int64
879-
vmCount int
880-
maxToProcess int
881-
expectedOffset int // Expected starting offset in the VM list
882-
expectedHit bool
883-
}{
884-
// 3 out of 4 runs should start at offset 0
885-
{
886-
name: "reconcile 1 - offset 0",
887-
reconcileCount: 1,
888-
vmCount: 10,
889-
maxToProcess: 3,
890-
expectedOffset: 0,
891-
expectedHit: true,
892-
},
893-
{
894-
name: "reconcile 2 - offset 0",
895-
reconcileCount: 2,
896-
vmCount: 10,
897-
maxToProcess: 3,
898-
expectedOffset: 0,
899-
expectedHit: true,
900-
},
901-
{
902-
name: "reconcile 3 - offset 0",
903-
reconcileCount: 3,
904-
vmCount: 10,
905-
maxToProcess: 3,
906-
expectedOffset: 0,
907-
expectedHit: true,
908-
},
909-
// Every 4th reconcile uses reconcileCount as offset (mod vmCount)
910-
{
911-
name: "reconcile 4 - offset 4",
876+
t.Run("no rotation - offset 0", func(t *testing.T) {
877+
ctx := context.Background()
878+
controller := &FailoverReservationController{
879+
reconcileCount: 1, // Not divisible by 4, so no rotation
880+
Config: FailoverConfig{
881+
VMSelectionRotationInterval: intPtr(4),
882+
},
883+
}
884+
885+
vms := createVMs(10)
886+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
887+
888+
if !hitLimit {
889+
t.Error("expected hitLimit=true")
890+
}
891+
if len(selected) != 3 {
892+
t.Errorf("expected 3 VMs selected, got %d", len(selected))
893+
}
894+
// Without rotation, should start at offset 0 (vm-a has most memory)
895+
if selected[0].VM.UUID != "vm-a" {
896+
t.Errorf("expected first VM to be vm-a, got %s", selected[0].VM.UUID)
897+
}
898+
})
899+
900+
t.Run("rotation triggered - random offset", func(t *testing.T) {
901+
ctx := context.Background()
902+
controller := &FailoverReservationController{
903+
reconcileCount: 4, // Divisible by 4, triggers rotation
904+
Config: FailoverConfig{
905+
VMSelectionRotationInterval: intPtr(4),
906+
},
907+
}
908+
909+
vms := createVMs(10)
910+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
911+
912+
if !hitLimit {
913+
t.Error("expected hitLimit=true")
914+
}
915+
if len(selected) != 3 {
916+
t.Errorf("expected 3 VMs selected, got %d", len(selected))
917+
}
918+
// With rotation, offset is random - just verify we got valid VMs
919+
// and the selection wraps correctly
920+
vmSet := make(map[string]bool)
921+
for _, vm := range vms {
922+
vmSet[vm.VM.UUID] = true
923+
}
924+
for _, s := range selected {
925+
if !vmSet[s.VM.UUID] {
926+
t.Errorf("selected VM %s not in original list", s.VM.UUID)
927+
}
928+
}
929+
})
930+
931+
t.Run("rotation disabled - always offset 0", func(t *testing.T) {
932+
ctx := context.Background()
933+
controller := &FailoverReservationController{
912934
reconcileCount: 4,
913-
vmCount: 10,
914-
maxToProcess: 3,
915-
expectedOffset: 4,
916-
expectedHit: true,
917-
},
918-
{
919-
name: "reconcile 5 - offset 0",
920-
reconcileCount: 5,
921-
vmCount: 10,
922-
maxToProcess: 3,
923-
expectedOffset: 0,
924-
expectedHit: true,
925-
},
926-
{
927-
name: "reconcile 6 - offset 0",
928-
reconcileCount: 6,
929-
vmCount: 10,
930-
maxToProcess: 3,
931-
expectedOffset: 0,
932-
expectedHit: true,
933-
},
934-
{
935-
name: "reconcile 7 - offset 0",
936-
reconcileCount: 7,
937-
vmCount: 10,
938-
maxToProcess: 3,
939-
expectedOffset: 0,
940-
expectedHit: true,
941-
},
942-
{
943-
name: "reconcile 8 - offset 8",
944-
reconcileCount: 8,
945-
vmCount: 10,
946-
maxToProcess: 3,
947-
expectedOffset: 8,
948-
expectedHit: true,
949-
},
950-
// Test wrap-around when reconcileCount > vmCount
951-
{
952-
name: "reconcile 12 - offset 2 (12 mod 10)",
953-
reconcileCount: 12,
954-
vmCount: 10,
955-
maxToProcess: 3,
956-
expectedOffset: 2, // 12 % 10 = 2
957-
expectedHit: true,
958-
},
959-
{
960-
name: "reconcile 20 - offset 0 (20 mod 10)",
961-
reconcileCount: 20,
962-
vmCount: 10,
963-
maxToProcess: 3,
964-
expectedOffset: 0, // 20 % 10 = 0
965-
expectedHit: true,
966-
},
967-
// Edge cases
968-
{
969-
name: "maxToProcess 0 - no limit, returns all",
935+
Config: FailoverConfig{
936+
VMSelectionRotationInterval: intPtr(0), // Disabled
937+
},
938+
}
939+
940+
vms := createVMs(10)
941+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
942+
943+
if !hitLimit {
944+
t.Error("expected hitLimit=true")
945+
}
946+
// With rotation disabled, should always start at offset 0
947+
if selected[0].VM.UUID != "vm-a" {
948+
t.Errorf("expected first VM to be vm-a, got %s", selected[0].VM.UUID)
949+
}
950+
})
951+
952+
t.Run("maxToProcess 0 - no limit, returns all", func(t *testing.T) {
953+
ctx := context.Background()
954+
controller := &FailoverReservationController{
970955
reconcileCount: 4,
971-
vmCount: 10,
972-
maxToProcess: 0,
973-
expectedOffset: 0, // No limit means all VMs returned starting from 0
974-
expectedHit: false,
975-
},
976-
{
977-
name: "maxToProcess >= vmCount - no limit hit",
956+
Config: FailoverConfig{
957+
VMSelectionRotationInterval: intPtr(4),
958+
},
959+
}
960+
961+
vms := createVMs(10)
962+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 0)
963+
964+
if hitLimit {
965+
t.Error("expected hitLimit=false when maxToProcess=0")
966+
}
967+
if len(selected) != 10 {
968+
t.Errorf("expected all 10 VMs when no limit, got %d", len(selected))
969+
}
970+
})
971+
972+
t.Run("maxToProcess >= vmCount - no limit hit", func(t *testing.T) {
973+
ctx := context.Background()
974+
controller := &FailoverReservationController{
978975
reconcileCount: 4,
979-
vmCount: 5,
980-
maxToProcess: 10,
981-
expectedOffset: 0, // All VMs fit, no rotation needed
982-
expectedHit: false,
983-
},
984-
}
976+
Config: FailoverConfig{
977+
VMSelectionRotationInterval: intPtr(4),
978+
},
979+
}
985980

986-
for _, tt := range tests {
987-
t.Run(tt.name, func(t *testing.T) {
988-
ctx := context.Background()
989-
controller := &FailoverReservationController{
990-
reconcileCount: tt.reconcileCount,
991-
Config: FailoverConfig{
992-
VMSelectionRotationInterval: intPtr(4), // Default rotation interval
993-
},
994-
}
981+
vms := createVMs(5)
982+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 10)
995983

996-
vms := createVMs(tt.vmCount)
997-
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, tt.maxToProcess)
984+
if hitLimit {
985+
t.Error("expected hitLimit=false when maxToProcess >= vmCount")
986+
}
987+
if len(selected) != 5 {
988+
t.Errorf("expected all 5 VMs, got %d", len(selected))
989+
}
990+
})
998991

999-
if hitLimit != tt.expectedHit {
1000-
t.Errorf("expected hitLimit=%v, got %v", tt.expectedHit, hitLimit)
1001-
}
992+
t.Run("empty VMs list", func(t *testing.T) {
993+
ctx := context.Background()
994+
controller := &FailoverReservationController{
995+
reconcileCount: 4,
996+
Config: FailoverConfig{
997+
VMSelectionRotationInterval: intPtr(4),
998+
},
999+
}
10021000

1003-
if !tt.expectedHit {
1004-
// When no limit is hit, all VMs should be returned
1005-
if len(selected) != tt.vmCount {
1006-
t.Errorf("expected all %d VMs when no limit hit, got %d", tt.vmCount, len(selected))
1007-
}
1008-
return
1009-
}
1001+
vms := []vmFailoverNeed{}
1002+
selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
10101003

1011-
// Verify the first selected VM is at the expected offset
1012-
if len(selected) == 0 {
1013-
t.Error("expected at least one VM selected")
1014-
return
1004+
if hitLimit {
1005+
t.Error("expected hitLimit=false for empty list")
1006+
}
1007+
if len(selected) != 0 {
1008+
t.Errorf("expected 0 VMs, got %d", len(selected))
1009+
}
1010+
})
1011+
1012+
t.Run("random offset is within bounds", func(t *testing.T) {
1013+
ctx := context.Background()
1014+
// Run multiple times to verify random offset is always valid
1015+
for i := range 20 {
1016+
controller := &FailoverReservationController{
1017+
reconcileCount: int64((i + 1) * 4), // Always triggers rotation
1018+
Config: FailoverConfig{
1019+
VMSelectionRotationInterval: intPtr(4),
1020+
},
10151021
}
10161022

1017-
// The VMs are sorted by memory descending, so vm-a has most memory, vm-j has least
1018-
// After sorting, the order is: vm-a, vm-b, vm-c, ..., vm-j
1019-
// With offset, we should start at vms[offset]
1020-
expectedFirstVM := vms[tt.expectedOffset].VM.UUID
1021-
actualFirstVM := selected[0].VM.UUID
1023+
vms := createVMs(10)
1024+
selected, _ := controller.selectVMsToProcess(ctx, vms, 3)
10221025

1023-
if actualFirstVM != expectedFirstVM {
1024-
t.Errorf("expected first VM to be %s (offset %d), got %s",
1025-
expectedFirstVM, tt.expectedOffset, actualFirstVM)
1026+
if len(selected) != 3 {
1027+
t.Errorf("iteration %d: expected 3 VMs, got %d", i, len(selected))
10261028
}
10271029

1028-
// Verify we got the expected number of VMs
1029-
expectedCount := tt.maxToProcess
1030-
if expectedCount > tt.vmCount {
1031-
expectedCount = tt.vmCount
1030+
// Verify all selected VMs are from the original list
1031+
vmSet := make(map[string]bool)
1032+
for _, vm := range vms {
1033+
vmSet[vm.VM.UUID] = true
10321034
}
1033-
if len(selected) != expectedCount {
1034-
t.Errorf("expected %d VMs selected, got %d", expectedCount, len(selected))
1035+
for _, s := range selected {
1036+
if !vmSet[s.VM.UUID] {
1037+
t.Errorf("iteration %d: selected VM %s not in original list", i, s.VM.UUID)
1038+
}
10351039
}
1036-
})
1037-
}
1040+
}
1041+
})
10381042
}

0 commit comments

Comments
 (0)