TraceCoreAI · trilamsr · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/Makefile b/Makefile
@@ -14,7 +14,7 @@
 .PHONY: coverage coverage-check
 
 # Policy gates (each enforces a specific RFC-bound invariant)
-.PHONY: license-check license-fix govulncheck dco-check ci-fuzz-nccl-fr nccl-fr-rce-gate register-lint actionlint zizmor doc-check doc-check-release no-autoupdate-check base-digest-check build-tags attribute-namespace-check deprecation-check rfc-status-check cut-criteria-status cut-criteria-status-all cut-criteria-render cut-criteria-check slo-rules-check test-flake-audit
+.PHONY: license-check license-fix govulncheck dco-check ci-fuzz-nccl-fr nccl-fr-rce-gate register-lint actionlint zizmor doc-check doc-check-release no-autoupdate-check anonymize-pod-evicted-fixture-check base-digest-check build-tags attribute-namespace-check deprecation-check rfc-status-check cut-criteria-status cut-criteria-status-all cut-criteria-render cut-criteria-check slo-rules-check test-flake-audit
 
 # Aggregate gates: pre-commit / pre-push / fast-CI / full-CI
 .PHONY: check verify ci ci-fast ci-full
@@ -309,7 +309,7 @@ cut-criteria-check:  ## Drift gate: every per-milestone rendered markdown must m
 	@# this gate. Override with MILESTONE=v1.0-rc1 (etc.) to scope.
 	@python3 scripts/cut_criteria.py check $(if $(MILESTONE),--milestone $(MILESTONE),)
 
-verify: check license-check generate-fixtures-check verdict-fixtures-check build-tags nccl-fr-rce-gate register-lint actionlint zizmor doc-check deprecation-check no-autoupdate-check test-flake-audit  ## Pre-push gate. Medium (<30s); CI handles heavy gates (test, coverage, govulncheck, fuzz, build).
+verify: check license-check generate-fixtures-check verdict-fixtures-check build-tags nccl-fr-rce-gate register-lint actionlint zizmor doc-check deprecation-check no-autoupdate-check anonymize-pod-evicted-fixture-check test-flake-audit  ## Pre-push gate. Medium (<30s); CI handles heavy gates (test, coverage, govulncheck, fuzz, build).
 
 test-extras-sustained:  ## (sub-target) sustained-load (5 min); see `make test-extras`.
 	@# kernelevents was the sole sustained-load suite consumer; deleted
@@ -403,6 +403,14 @@ no-autoupdate-check:  ## Enforce RFC-0008: cmd/, components/, internal/, pkg/ co
 	@scripts/no-autoupdate-check.sh
 	@scripts/no-autoupdate-check_test.sh
 
+anonymize-pod-evicted-fixture-check:  ## M19 carry-forward #1: verify every operator-contributed pod_evicted replay fixture under _real_world/ carries no PII shapes (IPv4, email, cloud-instance node names, image refs). Also runs the anonymizer's own mutation regression tests.
+	@set -e; for d in module/pkg/replay/pod_evicted/_real_world/*/; do \
+	  if [ -f "$$d/manifest.json" ]; then \
+	    bash scripts/anonymize-pod-evicted-fixture.sh --verify "$$d"; \
+	  fi; \
+	done
+	@bash scripts/anonymize-pod-evicted-fixture_test.sh
+
 zizmor:  ## Security-lint GitHub Actions workflows (template injection, untrusted-input-in-script, over-broad permissions, cache poisoning). Gates at --min-severity=high.
 	@scripts/zizmor.sh
 
@@ -420,7 +428,7 @@ ci-fast: lint vet mod-verify attribute-namespace-check doc-check test-flake-audi
 # the ratchet path. We accept the wall-time hit because local `ci-full`
 # divergence from CI surfaces ceiling breaches only post-PR-open, which
 # defeats the per-PR enforcement intent of #302.
-ci-full: license-check generate-fixtures-check verdict-fixtures-check vet build-tags tidy-check mod-verify lint nccl-fr-rce-gate register-lint actionlint zizmor coverage-check ci-fuzz-nccl-fr govulncheck doc-check deprecation-check no-autoupdate-check test-flake-audit pre-push-test build smoke smoke-quickstart bench-allocs-check  ## Everything CI runs. Run before opening a PR.
+ci-full: license-check generate-fixtures-check verdict-fixtures-check vet build-tags tidy-check mod-verify lint nccl-fr-rce-gate register-lint actionlint zizmor coverage-check ci-fuzz-nccl-fr govulncheck doc-check deprecation-check no-autoupdate-check anonymize-pod-evicted-fixture-check test-flake-audit pre-push-test build smoke smoke-quickstart bench-allocs-check  ## Everything CI runs. Run before opening a PR.
 
 pre-push-test:  ## Regression harness for .githooks/pre-push path-filter routing. Cheap (<1s); runs the hook in dry-run mode against synthetic diff ranges and asserts each gate fires only when its source paths change.
 	@bash scripts/pre-push-test.sh

diff --git a/docs/MILESTONES.md b/docs/MILESTONES.md
@@ -495,7 +495,7 @@ Superseded by RFC-0013 (kueue → `prometheusreceiver` recipe / kineto deferred
 - **Status:** ⧗ partial
 - **Depends on:** M10, M4b
 - **Landed:** `internal/synthesis/patterns/pod_evicted.go` + `internal/synthesis/replay/` runner + canonical + 3 negative fixtures + `_real_world/` contribution slot + JSON schema; `k8sevents.NodeRecord` + Node informer + RBAC delta; `tracecore failure-inject pod-evict` CLI; `Record.ReportingInstance` for node-name joining; `chaos.yml` `pattern-pod-evicted` job + `pod-evict --reason=DiskPressure` golden SHA pin in `tools/failure-inject/testdata/golden.sha256`.
-- **Carry-forward:** (1) Anonymized real-world fixtures populating `replay/pod_evicted/_real_world/<anon-name>/` (slot + contribution README + fixture-shape template exist; no captures contributed yet). Slot uses the `_real_world/` underscore prefix (was `real_world_*/` in the original carry-forward text) for parity with the existing `_negative/` group convention the loader already walks; (2) detection-latency rubric (≤5s p95) requires live-cluster integration; (3) `--filler=/tmp` real-kubelet eviction flag for `failure-inject pod-evict` once `--allow-cluster-write` gains a kube-apiserver write seam.
+- **Carry-forward:** (1) ⧗ Anonymized real-world fixtures populating `replay/pod_evicted/_real_world/<anon-name>/`. Infrastructure shipped: slot + contribution README + fixture-shape template + `scripts/anonymize-pod-evicted-fixture.sh` (deterministic sha8 rewrite + IPv4/email/cloud-instance/image-ref verifier, mutation-tested by sibling `_test.sh`, wired into `make ci-full` via `anonymize-pod-evicted-fixture-check`) + one synthetic-but-real-world-shaped fixture (`synthetic-2026-06-multi-rank-disk-pressure/`) that exercises multi-rank disk-pressure burst with mixed full+partial confidence, proving the loader walks `_real_world/` identically to `_negative/` (`TestPodEvictedReplay_RealWorldGroupLoaderSafe`). Operator-contributed captures still pending. Slot uses the `_real_world/` underscore prefix (was `real_world_*/` in the original carry-forward text) for parity with the existing `_negative/` group convention the loader already walks; (2) detection-latency rubric (≤5s p95) requires live-cluster integration; (3) `--filler=/tmp` real-kubelet eviction flag for `failure-inject pod-evict` once `--allow-cluster-write` gains a kube-apiserver write seam.
 
 **Functional rubrics:**
 - ☑ Detector at `internal/synthesis/patterns/pod_evicted.go` exports `PodEvictedDetector` consumed by `internal/synthesis/replay/runner_test.go`. Generic `Detector` interface deferred until M17/M18 add a second consumer per PRINCIPLES §3 rule-of-three.

diff --git a/docs/threat-model.md b/docs/threat-model.md
@@ -327,7 +327,7 @@ The pre-audit hygiene that makes the engagement worth the price:
 | NetworkPolicy template rendered by the chart | ⧗ — sibling-agent work; tracked | release lead | yes (audit §6.G) |
 | Conftest rule covering rendered ClusterRoles (§6.C residual mitigation) | ☐ | release lead | no (audit can flag; we ship the rule based on findings) |
 | Exporter-credential redaction wrapper (§6.B residual mitigation) | ☐ — upstream contribution to `confighttp` | release lead | no (audit confirms the need; we ship the upstream PR post-audit) |
-| Replay-corpus review against §1 "Replay corpus" asset row — no PII / operator-identifying data in committed fixtures | ☐ — diff every fixture under `module/pkg/replay/testdata/` | distributed-systems lead | yes |
+| Replay-corpus review against §1 "Replay corpus" asset row — no PII / operator-identifying data in committed fixtures | partial — `scripts/anonymize-pod-evicted-fixture.sh --verify` (+ mutation-tested `_test.sh`, wired into `make ci-full` via `anonymize-pod-evicted-fixture-check`) catches IPv4 / email / cloud-instance-node / image-ref shapes in any fixture under `module/pkg/replay/pod_evicted/_real_world/`. Per-detector siblings (`cuda_oom`, `nccl_hang`, …) tracked as a follow-up; the script is shaped to generalize. | distributed-systems lead | yes |
 
 **Definition of done for "ready to hand to auditor"**: every "yes"
 row above is ☑, the auditor has read this doc, and the scope letter

diff --git a/module/pkg/replay/pod_evicted/_real_world/README.md b/module/pkg/replay/pod_evicted/_real_world/README.md
@@ -38,24 +38,63 @@ new captures plug in without any code change.
 
 ## Contribution checklist
 
-1. **Anonymize first.** Strip pod names, namespaces, node names,
-   UIDs, image refs, and any user-identifying labels. Use stable
-   pseudonyms (e.g. `pod-aaa`, `node-bbb`) so the fixture is
-   self-consistent across `events.json` and `node_conditions.json`.
-   Reset timestamps to start at a fixed epoch so diffs stay
-   readable.
+1. **Anonymize first.** This is non-negotiable; the replay corpus is
+   a §1 "Replay corpus" asset row in [`docs/threat-model.md`](../../../../../docs/threat-model.md),
+   reviewed against the rule "no PII / operator-identifying data in
+   committed fixtures". The repo ships
+   [`scripts/anonymize-pod-evicted-fixture.sh`](../../../../../scripts/anonymize-pod-evicted-fixture.sh)
+   which deterministically rewrites the fields below; run it before
+   `git add`. The script is verified by `scripts/anonymize-pod-evicted-fixture_test.sh`
+   (mutation: if any PII survives, the script's verifier exits 1).
+
+   Fields the script scrubs (and you MUST verify by eye if you skip
+   the script):
+
+   | Field path                       | Replaced with                                       |
+   |----------------------------------|------------------------------------------------------|
+   | `regarding.namespace`            | `ns-<sha8>`                                          |
+   | `regarding.name`                 | `pod-<sha8>` (preserves trailing `-rank-N` if found) |
+   | `regarding.uid`                  | `pod-uid-<sha8>`                                     |
+   | `reporting_instance`             | `node-<sha8>` (the same in `node_conditions.json`)   |
+   | `node_name`, `node_uid`          | matching `node-<sha8>` / `node-uid-<sha8>`           |
+   | `event_uid`                      | `evt-<sha8>`                                         |
+   | `event_time`, `transition_at`    | normalized so the earliest event lands at `2026-01-01T00:00:00Z` (offsets preserved) |
+
+   The hash is deterministic (sha256 of the original value, first
+   8 hex chars), so the same input always anonymizes to the same
+   output — replay-test goldens stay stable across re-runs.
+
+   Out of scope for the script (verify by eye): IPv4 addresses
+   embedded in `note` / `message` strings (IPv6 is not auto-detected
+   — verify by eye), image refs, hostnames in prose, label values
+   copied into `description`. The script greps for the IPv4-shaped
+   subset and refuses to write the output until you scrub or override;
+   IPv6 must be removed manually.
+
 2. **Confirm the capture reproduces an interesting failure mode
    the canonical fixture does not.** State which mode in the
    manifest `description`.
+
 3. **Land the golden verdict alongside.** If the detector misses
    the real-world case, that is the point; file the gap in
    `docs/FOLLOWUPS.md` and add the fixture under `_negative/`
    until the detector catches up. The replay corpus is the
    evidence trail.
+
 4. **Make `manifest.json`'s `fixture_name` unique** across the
    corpus; the runner uses it for test-failure labels.
 
-No contributed fixtures yet; this README is the contract.
+5. **Run `bash scripts/anonymize-pod-evicted-fixture.sh --verify
+   _real_world/<your-anon-name>/`** as the last step before `git add`.
+   The verifier is mutation-tested: if it ever returns 0 on a
+   fixture that still contains live PII shapes, the test in
+   `scripts/anonymize-pod-evicted-fixture_test.sh` goes red.
+
+One synthetic-but-real-world-shaped fixture lives at
+`synthetic-2026-06-multi-rank-disk-pressure/`; it exists so the loader
+test in `runner_test.go` can prove the `_real_world/` group walks the
+same way `_negative/` does. Operator-contributed captures land
+alongside it.
 
 ## Fixture template
 

diff --git a/...pkg/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/events.json b/...pkg/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/events.json
@@ -0,0 +1,53 @@
+[
+  {
+    "event_uid": "evt-1a2b3c4d",
+    "action": "Evicting",
+    "reason": "Evicted",
+    "hint": "pod_evicted",
+    "regarding": {
+      "kind": "Pod",
+      "namespace": "ns-1a2b3c4d",
+      "name": "pod-aaaa1111-rank-3",
+      "uid": "pod-uid-aaaa1111"
+    },
+    "reporting_controller": "kubelet",
+    "reporting_instance": "node-7f9a2b4c",
+    "note": "The node was low on resource: ephemeral-storage. Threshold quantity: 10%, available: 8%.",
+    "event_time": "2026-01-01T00:00:10Z",
+    "type": "Warning"
+  },
+  {
+    "event_uid": "evt-2b3c4d5e",
+    "action": "Evicting",
+    "reason": "Evicted",
+    "hint": "pod_evicted",
+    "regarding": {
+      "kind": "Pod",
+      "namespace": "ns-1a2b3c4d",
+      "name": "pod-bbbb2222-rank-4",
+      "uid": "pod-uid-bbbb2222"
+    },
+    "reporting_controller": "kubelet",
+    "reporting_instance": "node-7f9a2b4c",
+    "note": "The node was low on resource: ephemeral-storage. Threshold quantity: 10%, available: 7%.",
+    "event_time": "2026-01-01T00:00:12Z",
+    "type": "Warning"
+  },
+  {
+    "event_uid": "evt-3c4d5e6f",
+    "action": "Evicting",
+    "reason": "Evicted",
+    "hint": "pod_evicted",
+    "regarding": {
+      "kind": "Pod",
+      "namespace": "ns-1a2b3c4d",
+      "name": "pod-cccc3333-rank-7",
+      "uid": "pod-uid-cccc3333"
+    },
+    "reporting_controller": "kubelet",
+    "reporting_instance": "node-7f9a2b4c",
+    "note": "The node was low on resource: ephemeral-storage. Container nccl-rank-7 used 14Gi which exceeds its request of 8Gi.",
+    "event_time": "2026-01-01T00:00:35Z",
+    "type": "Warning"
+  }
+]
diff --git a/...pkg/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/golden.json b/...pkg/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/golden.json
@@ -0,0 +1,71 @@
+[
+  {
+    "pattern.id": "14",
+    "headline": "Pod ns-1a2b3c4d/pod-aaaa1111-rank-3 evicted at 2026-01-01T00:00:10Z due to disk pressure",
+    "remediation": "On node node-7f9a2b4c: Free imagefs or relocate the training write path to NVMe; tighten kubelet --eviction-hard nodefs.available.",
+    "confidence": "full",
+    "evidence_trail": [
+      {
+        "kind": "pod_event",
+        "uid": "evt-1a2b3c4d",
+        "timestamp": "2026-01-01T00:00:10Z",
+        "description": "Pod ns-1a2b3c4d/pod-aaaa1111-rank-3 evicted (reason=Evicted) on node node-7f9a2b4c"
+      },
+      {
+        "kind": "node_condition",
+        "uid": "node-uid-7f9a2b4c/disk/1767225600000000000",
+        "timestamp": "2026-01-01T00:00:00Z",
+        "description": "Node node-7f9a2b4c entered disk pressure: imagefs.available<10%"
+      }
+    ],
+    "k8s.pod.name": "pod-aaaa1111-rank-3",
+    "k8s.pod.namespace": "ns-1a2b3c4d",
+    "k8s.node.name": "node-7f9a2b4c",
+    "k8s.event.reason": "Evicted"
+  },
+  {
+    "pattern.id": "14",
+    "headline": "Pod ns-1a2b3c4d/pod-bbbb2222-rank-4 evicted at 2026-01-01T00:00:12Z due to disk pressure",
+    "remediation": "On node node-7f9a2b4c: Free imagefs or relocate the training write path to NVMe; tighten kubelet --eviction-hard nodefs.available.",
+    "confidence": "full",
+    "evidence_trail": [
+      {
+        "kind": "pod_event",
+        "uid": "evt-2b3c4d5e",
+        "timestamp": "2026-01-01T00:00:12Z",
+        "description": "Pod ns-1a2b3c4d/pod-bbbb2222-rank-4 evicted (reason=Evicted) on node node-7f9a2b4c"
+      },
+      {
+        "kind": "node_condition",
+        "uid": "node-uid-7f9a2b4c/disk/1767225600000000000",
+        "timestamp": "2026-01-01T00:00:00Z",
+        "description": "Node node-7f9a2b4c entered disk pressure: imagefs.available<10%"
+      }
+    ],
+    "k8s.pod.name": "pod-bbbb2222-rank-4",
+    "k8s.pod.namespace": "ns-1a2b3c4d",
+    "k8s.node.name": "node-7f9a2b4c",
+    "k8s.event.reason": "Evicted"
+  },
+  {
+    "pattern.id": "14",
+    "headline": "Pod ns-1a2b3c4d/pod-cccc3333-rank-7 evicted at 2026-01-01T00:00:35Z due to disk",
+    "remediation": "On node node-7f9a2b4c: Free imagefs or relocate the training write path to NVMe; tighten kubelet --eviction-hard nodefs.available.",
+    "confidence": "partial",
+    "evidence_trail": [
+      {
+        "kind": "pod_event",
+        "uid": "evt-3c4d5e6f",
+        "timestamp": "2026-01-01T00:00:35Z",
+        "description": "Pod ns-1a2b3c4d/pod-cccc3333-rank-7 evicted (reason=Evicted) on node node-7f9a2b4c"
+      }
+    ],
+    "k8s.pod.name": "pod-cccc3333-rank-7",
+    "k8s.pod.namespace": "ns-1a2b3c4d",
+    "k8s.node.name": "node-7f9a2b4c",
+    "k8s.event.reason": "Evicted",
+    "missing_layers": [
+      "node_condition"
+    ]
+  }
+]
diff --git a/...g/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/manifest.json b/...g/replay/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/manifest.json
@@ -0,0 +1,6 @@
+{
+  "pattern_id": "14",
+  "fixture_name": "real_world_synthetic_multi_rank_disk_pressure",
+  "description": "Synthetic-but-real-world-shaped capture: a single node enters disk pressure and the kubelet evicts three same-namespace training ranks. The first two evictions land inside the 30s join window (full confidence — both reuse the same indexed node-condition); the third lands at T+35s, outside the default window, so the detector falls back to the note-based partial path. Exercises (a) per-node condition cache reuse across N evictions, (b) the partial-remediation cache for (node, pressure) re-renders, and (c) the loader walking `_real_world/` exactly like `_negative/`. Names are deterministically sha8-anonymized so this fixture is a faithful template for operator captures.",
+  "expected_timing": "node-condition transition at T+0s; evictions at T+10s and T+12s (full, joined); eviction at T+35s (partial, outside 30s window, note-inferred disk pressure)"
+}
diff --git a/...y/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/node_conditions.json b/...y/pod_evicted/_real_world/synthetic-2026-06-multi-rank-disk-pressure/node_conditions.json
@@ -0,0 +1,10 @@
+[
+  {
+    "node_name": "node-7f9a2b4c",
+    "node_uid": "node-uid-7f9a2b4c",
+    "hint": "node_pressure",
+    "pressure": "disk",
+    "transition_at": "2026-01-01T00:00:00Z",
+    "message": "imagefs.available<10%"
+  }
+]