From 1d8ffc61952dd6d8d17a8c53c210fb29951d39e7 Mon Sep 17 00:00:00 2001 From: Tri Lam Date: Mon, 1 Jun 2026 16:15:16 -0700 Subject: [PATCH] fix(fixtures): pin pattern.id to detector consts (audit #421) Five rows in the canonical SDK fixture (docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json) carried wrong pattern.id values: hbm_ecc "17"->"3", thermal_throttle "18"->"4", pcie_aer "19"->"5", cuda_oom "20"->"10", ib_link_flap "21"->"2". The envelope schema only constrains pattern.id to be a non-empty string, so per-pattern envelope tests round-tripped the drift silently. The ib_link_flap row also used the unscoped key "node" while the Go struct serializes Node as "k8s.node.name" (the customer-stable attribute namespace per docs/ATTRIBUTES.md). Renamed to match. Drift-prevention test TestCanonicalShippedFixtures_PatternIDsMatchDetectorConsts pins each fixture row's pattern.id to the PatternID* const exported by the detector package, and asserts every detector const has a fixture row (symmetry guard against orphans on either side). Refs #421 Refs #398 --- .../fixtures/shipped-patterns-v1.0.0-rc1.json | 12 +-- .../patterns/verdict_envelope_schema_test.go | 75 +++++++++++++++++++ 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json b/docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json index f519caca..d980a4a4 100644 --- a/docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json +++ b/docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json @@ -50,7 +50,7 @@ { "name": "hbm_ecc", "payload": { - "pattern.id": "17", + "pattern.id": "3", "headline": "h", "remediation": "r", "evidence_trail": [ @@ -66,7 +66,7 @@ { "name": "thermal_throttle", "payload": { - "pattern.id": "18", + "pattern.id": "4", "headline": "h", "remediation": "r", "evidence_trail": [ @@ -81,7 +81,7 @@ { "name": "pcie_aer", "payload": { - "pattern.id": "19", + "pattern.id": "5", "headline": "h", "remediation": "r", "evidence_trail": [ @@ -98,7 +98,7 @@ { "name": "cuda_oom", "payload": { - "pattern.id": "20", + "pattern.id": "10", "headline": "h", "remediation": "r", "confidence": "full", @@ -117,7 +117,7 @@ { "name": "ib_link_flap", "payload": { - "pattern.id": "21", + "pattern.id": "2", "headline": "h", "remediation": "r", "confidence": "partial", @@ -125,7 +125,7 @@ {"kind": "ib_port_state", "uid": "u", "timestamp": "2026-05-18T10:00:00Z", "description": "d"} ], "missing_layers": ["hw_throttle"], - "node": "n", + "k8s.node.name": "n", "hca_device": "mlx5_0", "port": 1, "transition_count": 3 diff --git a/module/pkg/patterns/verdict_envelope_schema_test.go b/module/pkg/patterns/verdict_envelope_schema_test.go index 2d1ddb2a..34b706c9 100644 --- a/module/pkg/patterns/verdict_envelope_schema_test.go +++ b/module/pkg/patterns/verdict_envelope_schema_test.go @@ -353,3 +353,78 @@ func TestVerdictEnvelopeV1RC1_HasStableID(t *testing.T) { "Today the $id points at raw.githubusercontent.com (resolves immediately, no infra "+ "required); a future move to schema.tracecore.io is tracked as a follow-up infra issue.") } + +// TestCanonicalShippedFixtures_PatternIDsMatchDetectorConsts pins each +// fixture row's `pattern.id` to the detector-package constant for the +// same pattern. The envelope schema only constrains pattern.id to be +// a non-empty string (so any string passes envelope validation); this +// test closes the audit-#421 finding-2 hole — five fixture rows had +// drifted pattern.id values (hbm_ecc "17"→canonical "3", +// thermal_throttle "18"→"4", pcie_aer "19"→"5", cuda_oom "20"→"10", +// ib_link_flap "21"→"2") that envelope validation could not catch. +// The fixture row name is the dictionary key; the canonical const is +// the value third-party SDKs key off in production. A future fixture +// edit that drifts either side fails this test BEFORE the SDK suites +// silently round-trip the wrong ID. +func TestCanonicalShippedFixtures_PatternIDsMatchDetectorConsts(t *testing.T) { + t.Parallel() + + // canonicalPatternIDByFixtureName mirrors the constants declared + // in each detector file. Adding a pattern means adding a row here + // + a fixture row + (transitively) a row in the + // TestVerdictEnvelopeV1RC1_AllShippedVerdictsValidate cases table. + canonicalPatternIDByFixtureName := map[string]string{ + "pod_evicted": patterns.PatternIDPodEvicted, + "nccl_hang": patterns.PatternIDNCCLHang, + "xid_correlation": patterns.PatternIDXidCorrelation, + "hbm_ecc": patterns.PatternIDHBMECC, + "thermal_throttle": patterns.PatternIDThermalThrottle, + "pcie_aer": patterns.PatternIDPCIeAER, + "cuda_oom": patterns.PatternIDCUDAOOM, + "ib_link_flap": patterns.PatternIDIBLinkFlap, + "silent_data_corruption": patterns.PatternIDSilentDataCorruption, + } + + bs, err := os.ReadFile(canonicalShippedFixturesPath) //nolint:gosec // test-local relative path + require.NoError(t, err) + + var doc struct { + Fixtures []struct { + Name string `json:"name"` + Payload map[string]any `json:"payload"` + } `json:"fixtures"` + } + require.NoError(t, json.Unmarshal(bs, &doc)) + + // Symmetry: every detector const has exactly one fixture row, and + // every fixture row matches a detector const. A new pattern with + // no fixture (or a fixture with no detector const) fails fast. + seenInFixtures := make(map[string]bool, len(doc.Fixtures)) + for _, fx := range doc.Fixtures { + fx := fx + t.Run(fx.Name, func(t *testing.T) { + t.Parallel() + wantID, ok := canonicalPatternIDByFixtureName[fx.Name] + require.True(t, ok, + "fixture %q has no canonical-pattern-id mapping; add a row to "+ + "canonicalPatternIDByFixtureName in this test pointing at the "+ + "detector-package PatternID* const for this pattern.", fx.Name) + gotID, _ := fx.Payload["pattern.id"].(string) + require.Equal(t, wantID, gotID, + "fixture %q pattern.id %q drifted from detector const %q. "+ + "The fixture is consumed by Go + Python SDK round-trip suites; "+ + "a wrong pattern.id ships an invalid pattern identifier to every "+ + "downstream consumer. Fix: update the fixture row in "+ + "docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json.", + fx.Name, gotID, wantID) + }) + seenInFixtures[fx.Name] = true + } + for name := range canonicalPatternIDByFixtureName { + if !seenInFixtures[name] { + t.Errorf("detector pattern %q declared in this test has no fixture row "+ + "in docs/schemas/fixtures/shipped-patterns-v1.0.0-rc1.json — every "+ + "shipped pattern MUST have a fixture (issue #368).", name) + } + } +}