Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,10 +652,10 @@ func main() {
os.Exit(1)
}

syncerMonitor := commitments.NewSyncerMonitor()
must.Succeed(metrics.Registry.Register(syncerMonitor))
if slices.Contains(mainConfig.EnabledTasks, "commitments-sync-task") {
setupLog.Info("starting commitments syncer")
syncerMonitor := commitments.NewSyncerMonitor()
must.Succeed(metrics.Registry.Register(syncerMonitor))
syncer := commitments.NewSyncer(multiclusterClient, syncerMonitor)
syncerConfig := conf.GetConfigOrDie[commitments.SyncerConfig]()
syncerConfig.ApplyDefaults()
Expand Down
2 changes: 1 addition & 1 deletion internal/scheduling/reservations/commitments/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (s *Syncer) getCommitmentStates(ctx context.Context, log logr.Logger, flavo
"expectedUnit", expectedUnit,
"smallestFlavorMemoryMB", flavorGroup.SmallestFlavor.MemoryMB)
if s.monitor != nil {
s.monitor.RecordUnitMismatch(flavorGroupName)
s.monitor.RecordCommitmentSkipped(SkipReasonUnitMismatch)
}
// Track skipped commitment so its existing CRDs won't be deleted
if commitment.UUID != "" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,6 @@ func (m *SyncerMonitor) RecordCommitmentSkipped(reason string) {
m.commitmentsSkipped.WithLabelValues(reason).Inc()
}

// RecordUnitMismatch records a unit mismatch skip (convenience method).
func (m *SyncerMonitor) RecordUnitMismatch(_ string) {
m.commitmentsSkipped.WithLabelValues(SkipReasonUnitMismatch).Inc()
}

// RecordReservationsCreated records reservations created.
func (m *SyncerMonitor) RecordReservationsCreated(count int) {
m.reservationsCreated.Add(float64(count))
Expand Down
108 changes: 108 additions & 0 deletions internal/scheduling/reservations/commitments/syncer_monitor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0

package commitments

import (
"testing"

"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)

// TestSyncerMonitor_MetricsRegistration verifies that all syncer metrics are
// present at zero immediately after registration, without any increments.
// This matters because alert rules reference these metrics and will warn
// "metric missing" if they don't appear before the first sync run.
func TestSyncerMonitor_MetricsRegistration(t *testing.T) {
registry := prometheus.NewRegistry()
monitor := NewSyncerMonitor()

if err := registry.Register(monitor); err != nil {
t.Fatalf("failed to register monitor: %v", err)
}

families, err := registry.Gather()
if err != nil {
t.Fatalf("failed to gather metrics: %v", err)
}

gathered := make(map[string]*dto.MetricFamily)
for _, f := range families {
gathered[f.GetName()] = f
}

cases := []struct {
name string
metricType dto.MetricType
}{
{"cortex_committed_resource_syncer_runs_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_errors_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_commitments_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_commitments_processed_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_commitments_skipped_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_reservations_created_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_reservations_deleted_total", dto.MetricType_COUNTER},
{"cortex_committed_resource_syncer_reservations_repaired_total", dto.MetricType_COUNTER},
}

for _, tc := range cases {
f, ok := gathered[tc.name]
if !ok {
t.Errorf("metric %q missing after registration (no increments needed)", tc.name)
continue
}
if f.GetType() != tc.metricType {
t.Errorf("metric %q: expected type %v, got %v", tc.name, tc.metricType, f.GetType())
}
}
}

// TestSyncerMonitor_SkipReasonsPreInitialized verifies that all skip reason
// label combinations are present at zero before any commitment is skipped.
// This prevents "metric missing" warnings for the skipped_total CounterVec.
func TestSyncerMonitor_SkipReasonsPreInitialized(t *testing.T) {
registry := prometheus.NewRegistry()
monitor := NewSyncerMonitor()

if err := registry.Register(monitor); err != nil {
t.Fatalf("failed to register monitor: %v", err)
}

families, err := registry.Gather()
if err != nil {
t.Fatalf("failed to gather metrics: %v", err)
}

var skippedFamily *dto.MetricFamily
for _, f := range families {
if f.GetName() == "cortex_committed_resource_syncer_commitments_skipped_total" {
skippedFamily = f
break
}
}
if skippedFamily == nil {
t.Fatal("cortex_committed_resource_syncer_commitments_skipped_total missing after registration")
}

presentReasons := make(map[string]bool)
for _, m := range skippedFamily.Metric {
for _, l := range m.Label {
if l.GetName() == "reason" {
presentReasons[l.GetValue()] = true
}
}
}

for _, reason := range []string{
SkipReasonUnitMismatch,
SkipReasonUnknownFlavorGroup,
SkipReasonInvalidResource,
SkipReasonEmptyUUID,
SkipReasonNonCompute,
} {
if !presentReasons[reason] {
t.Errorf("skip reason %q not pre-initialized in commitments_skipped_total", reason)
}
}
}
Loading