TraceCoreAI · trilamsr · Jun 2, 2026 · Jun 2, 2026
diff --git a/.github/workflows/chart.yml b/.github/workflows/chart.yml
@@ -458,6 +458,7 @@ jobs:
         with:
           cluster-name: tracecore-m5b
       - name: helm install + measure install-to-Ready
+        id: install
         run: |
           set -eo pipefail
           start=$(date +%s)
@@ -474,9 +475,35 @@ jobs:
           end=$(date +%s)
           dur=$((end - start))
           echo "install_to_ready_seconds=$dur" >> "$GITHUB_OUTPUT"
+          # Persist the per-run sample so the rolling-median aggregator
+          # (M3 carry-forward, docs/MILESTONES.md L209) can download it
+          # via `gh run download` from the next CI run. Sibling pattern:
+          # PR #446's bench-cv-rolling artifact pipeline.
+          mkdir -p helm-install-artifacts
+          printf '%s\n' "$dur" > helm-install-artifacts/install_to_ready_seconds.txt
+          {
+            echo "sha=${GITHUB_SHA}"
+            echo "run_id=${GITHUB_RUN_ID}"
+            echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+            echo "runner=ubuntu-latest"
+          } > helm-install-artifacts/metadata.txt
           echo "::notice::install-to-Ready: ${dur}s (rubric: ≤300s)"
           test "$dur" -le 300 \
             || { echo "::error::install-to-Ready ${dur}s exceeds 300s rubric"; exit 1; }
+      - name: Upload helm-install duration artifact (M3 #209 carry-forward)
+        # Feeds `scripts/helm-install-rolling.sh` so the 10-run median
+        # gate can graduate ⧗ → ☑ once 10 successful main-branch runs
+        # have accumulated artifacts. `if: always()` so a single-run
+        # 300s breach (which exits the previous step non-zero) still
+        # uploads its sample — the rolling-median view is more useful
+        # with the regression-run data point included than without it.
+        if: always() && steps.install.outputs.install_to_ready_seconds != ''
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: helm-install-duration-${{ github.run_id }}
+          path: helm-install-artifacts/
+          if-no-files-found: warn
+          retention-days: 90
       - name: "helm status — STATUS: deployed"
         run: |
           status=$(helm status tracecore --namespace tracecore-system | grep '^STATUS:' | awk '{print $2}')

diff --git a/Makefile b/Makefile
@@ -2,7 +2,7 @@
 .PHONY: help build clean hooks
 
 # Test suites
-.PHONY: test test-extras test-extras-sustained test-extras-fuzz test-extras-fuzz-kmsg test-extras-fuzz-journald test-extras-fuzz-nccl-fr test-extras-race bench bench-check bench-allocs-check bench-baseline bench-detectors bench-detectors-check bench-detectors-baseline bench-cv-report
+.PHONY: test test-extras test-extras-sustained test-extras-fuzz test-extras-fuzz-kmsg test-extras-fuzz-journald test-extras-fuzz-nccl-fr test-extras-race bench bench-check bench-allocs-check bench-baseline bench-detectors bench-detectors-check bench-detectors-baseline bench-cv-report helm-install-rolling-report
 
 # Format + tidy
 .PHONY: fmt fmt-fix vet lint lint-fix tidy tidy-check mod-verify bump-otel
@@ -111,6 +111,12 @@ bench-cv-report:  ## Print per-detector allocs/op CV across the last N bench.yml
 	@# /tmp/tracecore-bench-artifacts/ — safe to wipe between sessions.
 	scripts/bench-cv-rolling.sh
 
+helm-install-rolling-report:  ## Median helm install + DaemonSet Ready across the last N chart.yml runs on main (M3 #209 carry-forward). Exits non-zero if median >300s. Requires `gh` auth; offline when unauthed.
+	@# N override: `N=20 make helm-install-rolling-report`. Cache lives
+	@# under /tmp/tracecore-helm-install-artifacts/. Sibling pattern to
+	@# `make bench-cv-report` (PR #446).
+	scripts/helm-install-rolling.sh
+
 
 fmt:  ## Check formatting; fails if any file is not gofumpt-clean.
 	@# gofumpt has no native exclude flag; filter ./_build/ (OCB-generated,

diff --git a/docs/MILESTONES.md b/docs/MILESTONES.md
@@ -206,7 +206,7 @@ Critical path to v0.1.0; the only lane in which a single milestone (M21) gates e
 - ☑ Rendered pod spec passes the Kubernetes `restricted` Pod Security Standard except for explicit `SYS_PTRACE` and the host-path mounts required by receivers; deviation list is enumerated in the chart README with a one-line justification per item. (per https://kubernetes.io/docs/concepts/security/pod-security-standards/)
 - ☑ DaemonSet template sets `securityContext.runAsNonRoot: true`, a non-zero `runAsUser`, `seccompProfile.type: RuntimeDefault`, `allowPrivilegeEscalation: false`; CI asserts each field via `yq`/grep gate. (per NORTHSTARS O2)
 - ☑ `Chart.yaml` declares `apiVersion: v2`, a SemVer `version`, and an `appVersion` matching the tracecore binary tag; CI gate fails on drift. (per PRINCIPLES §15)
-- ⧗ `helm install` plus DaemonSet `Ready` on a single-node kind cluster completes in ≤5 min median across 10 CI runs. *(Single-run ≤300s gate in `chart.yml`; 10-run median aggregation is the carry-forward.)* (per NORTHSTARS O2 hero-KPI)
+- ⧗ `helm install` plus DaemonSet `Ready` on a single-node kind cluster completes in ≤5 min median across 10 CI runs. *(Single-run ≤300s gate in `chart.yml`; 10-run median aggregation now live via `scripts/helm-install-rolling.sh` + per-run `helm-install-duration-<run_id>` artifact upload in `chart.yml` — flips ⧗ → ☑ once 10 successful main-branch runs have accumulated artifacts. Sibling pattern: PR #446's `bench-cv-rolling`. Operator entry point: `make helm-install-rolling-report`.)* (per NORTHSTARS O2 hero-KPI)
 
 ### M20. Reference-cluster install benchmark (staged)
 

diff --git a/install/kubernetes/tracecore/README.md b/install/kubernetes/tracecore/README.md
@@ -540,6 +540,31 @@ kubectl label namespace tracecore-system \
   pod-security.kubernetes.io/warn=restricted
 ```
 
+**`make helm-install-rolling-report` reports median above 300s.** The
+M3 carry-forward rubric (`docs/MILESTONES.md` L209) requires the
+`helm install` + DaemonSet `Ready` wall-clock to land at a median ≤5
+min across 10 successful CI runs. `chart.yml`'s `install` job uploads
+each run's `helm-install-duration-<run_id>` artifact; the script
+`scripts/helm-install-rolling.sh` (operator entry point: `make
+helm-install-rolling-report`) downloads the last 10 via `gh run
+download` and computes the median.
+
+When the median trips the 300s gate:
+
+1. Run `make helm-install-rolling-report` locally to see per-run
+   samples. Borderline (~290-310s) often means flake noise; sustained
+   means real regression.
+2. If a single run jumped to 400-500s, `gh run view <id> --log` and
+   look for image-pull or probe-misconfig stalls in the kind-up step.
+3. If every run jumped, suspect a chart template edit. `git bisect`
+   between the last-green run sha and the first-red run sha against
+   `install/kubernetes/tracecore/`.
+
+The single-run ≤300s gate is the hard fail inside the workflow; the
+rolling-median view is the carry-forward layer that flips ⧗ → ☑ once
+10 successful main-branch runs have artifacts. Sibling pattern: PR
+#446's `bench-cv-rolling` for per-detector allocs/op CV.
+
 ## Pod Security Standard compliance
 
 The chart targets the Kubernetes [`restricted`](https://kubernetes.io/docs/concepts/security/pod-security-standards/)

diff --git a/scripts/helm-install-rolling.sh b/scripts/helm-install-rolling.sh
@@ -0,0 +1,231 @@
+#!/usr/bin/env bash
+# helm-install-rolling.sh — rolling median of `helm install` plus
+# DaemonSet Ready wall-clock across the last N successful chart.yml
+# runs on main. Closes the M3 carry-forward (docs/MILESTONES.md L209):
+# "≤5 min median across 10 CI runs". The single-run ≤300s gate already
+# lives in `.github/workflows/chart.yml`; this is the 10-run
+# aggregation layer.
+#
+# Sibling pattern: scripts/bench-cv-rolling.sh (PR #446) does the same
+# shape — download last N artifacts via `gh run download`, parse a
+# single numeric per artifact, aggregate. Differences:
+#   * scope: install-to-Ready duration per run (one sample), not 10
+#     bench samples per detector per run
+#   * statistic: median (matches MILESTONES.md wording "≤5 min median")
+#     rather than CV
+#   * gate: ≤300s (matches single-run threshold so the aggregation can
+#     graduate from advisory to hard-fail without redefining the rubric)
+#
+# How it works:
+#   1. List the last N successful runs of `.github/workflows/chart.yml`
+#      via `gh run list`.
+#   2. Download each run's `helm-install-duration-<run_id>` artifact via
+#      `gh run download`. Cached locally in $TC_HELM_INSTALL_CACHE_DIR
+#      (default /tmp/tracecore-helm-install-artifacts).
+#   3. Read install_to_ready_seconds.txt (single integer) per artifact.
+#   4. Print every sample + median across N runs; exit 0 if median ≤
+#      300, exit 1 if median > 300.
+#
+# Edge cases (parity with bench-cv-rolling):
+#   - Missing artifacts (older runs predating this PR) skipped with a
+#     one-line note; the script still produces a report from whatever
+#     runs do have artifacts.
+#   - n_runs < 10: prints "need ≥10 runs" warning; does NOT fail the
+#     gate yet (the carry-forward says the gate flips ⧗ → ☑ "once 10
+#     runs accumulate"). Exit code is still pass/fail based on the
+#     median of what we have.
+#   - Garbage content in an artifact (non-integer): skip that run,
+#     continue aggregating; do not crash. Bench-cv-rolling handles
+#     the equivalent via the awk allocs/op-line-only grep.
+#   - Offline / no `gh`: prints a "no rolling data available" message
+#     and exits 0 (not a failure — the offline operator just gets the
+#     fallback view). Sibling bench-cv-rolling.sh falls back to
+#     baselines.json; this script has no equivalent single-sample
+#     source, so the fallback is informational.
+#
+# Failure-mode debug recipe (when CI flips this script red):
+#   1. Pull last 10 runs locally: `make helm-install-rolling-report`.
+#   2. If median is borderline (~290-310s), inspect per-run samples
+#      printed in the report — flake noise vs sustained regression.
+#   3. If a single run jumped to 400-500s, download its kind-up logs
+#      via `gh run view <id> --log` and look for image-pull / probe-
+#      misconfig stalls.
+#   4. If every run jumped, suspect a chart template edit — `git
+#      bisect` between the last-green run sha and the first-red run
+#      sha against `install/kubernetes/tracecore/`.
+#
+# Usage:
+#   scripts/helm-install-rolling.sh                    # last 10 runs
+#   N=20 scripts/helm-install-rolling.sh               # last 20 runs
+#   scripts/helm-install-rolling.sh --dir /path/to/dir # offline, parse
+#                                                      # local dir of
+#                                                      # install_to_
+#                                                      # ready_seconds
+#                                                      # .txt files
+#
+# Portability: bash 3.2 (macOS stock) — no associative arrays, no
+# mapfile, no readarray.
+set -euo pipefail
+
+N="${N:-10}"
+WORKFLOW="${WORKFLOW:-chart.yml}"
+CACHE_DIR="${TC_HELM_INSTALL_CACHE_DIR:-/tmp/tracecore-helm-install-artifacts}"
+mode="ci"
+local_dir=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --dir)
+            mode="local"
+            local_dir="$2"
+            shift 2
+            ;;
+        --help|-h)
+            sed -n '2,72p' "$0"
+            exit 0
+            ;;
+        *)
+            echo "helm-install-rolling: unknown flag $1" >&2
+            exit 2
+            ;;
+    esac
+done
+
+mkdir -p "$CACHE_DIR"
+runs_seen=0
+
+if [[ "$mode" == "local" ]]; then
+    if [[ ! -d "$local_dir" ]]; then
+        echo "helm-install-rolling: --dir path '$local_dir' does not exist" >&2
+        exit 2
+    fi
+    # Each *.txt file under --dir is one "run". Treat its single-line
+    # integer as the install-to-Ready measurement.
+    i=0
+    while IFS= read -r f; do
+        i=$((i + 1))
+        run_dir="$CACHE_DIR/local-$i"
+        mkdir -p "$run_dir"
+        cp "$f" "$run_dir/install_to_ready_seconds.txt"
+        runs_seen=$((runs_seen + 1))
+    done < <(find "$local_dir" -type f -name '*.txt' | sort)
+else
+    if ! command -v gh >/dev/null 2>&1; then
+        echo "helm-install-rolling: gh CLI not in PATH; no rolling data available" >&2
+        echo "  (Sibling bench-cv-rolling.sh falls back to baselines.json;" >&2
+        echo "  no equivalent single-sample source exists for install duration.)" >&2
+        exit 0
+    fi
+fi
+
+if [[ "$mode" == "ci" ]]; then
+    runs_json=$(gh run list \
+        --workflow="$WORKFLOW" \
+        --status=success \
+        --branch=main \
+        --limit="$N" \
+        --json=databaseId,headSha,createdAt 2>/dev/null || echo '[]')
+
+    run_ids=$(echo "$runs_json" | jq -r '.[].databaseId' 2>/dev/null || true)
+    if [[ -z "$run_ids" ]]; then
+        echo "helm-install-rolling: no successful main-branch runs found for $WORKFLOW" >&2
+        echo "  (artifact pipeline likely not landed on main yet — check #444-style follow-up)" >&2
+        exit 0
+    fi
+
+    for run_id in $run_ids; do
+        run_dir="$CACHE_DIR/run-$run_id"
+        if [[ -f "$run_dir/install_to_ready_seconds.txt" ]]; then
+            runs_seen=$((runs_seen + 1))
+            continue
+        fi
+        mkdir -p "$run_dir"
+        if gh run download "$run_id" \
+            --name="helm-install-duration-$run_id" \
+            --dir="$run_dir" 2>/dev/null; then
+            if [[ -f "$run_dir/install_to_ready_seconds.txt" ]]; then
+                runs_seen=$((runs_seen + 1))
+            else
+                echo "  skip run $run_id (artifact present but empty)" >&2
+            fi
+        else
+            echo "  skip run $run_id (no helm-install artifact — pre-#445 or expired)" >&2
+        fi
+    done
+
+    if [[ "$runs_seen" -eq 0 ]]; then
+        echo "helm-install-rolling: 0 runs had artifacts (gate not yet primed)" >&2
+        exit 0
+    fi
+fi
+
+# Collect every parseable sample into a sorted file. Garbage-tolerant:
+# non-integer content is dropped (and the run is silently skipped — the
+# operator already saw the per-run breakdown above).
+samples=$(mktemp)
+trap 'rm -f "$samples"' EXIT
+
+valid_runs=0
+for d in "$CACHE_DIR"/*/; do
+    f="$d/install_to_ready_seconds.txt"
+    if [[ -f "$f" ]]; then
+        # Read the single-line integer. Tolerate trailing whitespace.
+        val=$(head -1 "$f" | tr -d '[:space:]')
+        if [[ "$val" =~ ^[0-9]+$ ]]; then
+            echo "$val" >> "$samples"
+            valid_runs=$((valid_runs + 1))
+        else
+            echo "  skip $f (non-integer content: '$val')" >&2
+        fi
+    fi
+done
+
+if [[ "$valid_runs" -eq 0 ]]; then
+    echo "helm-install-rolling: collected $runs_seen runs but 0 parsed (bad artifacts?)" >&2
+    exit 2
+fi
+
+# Median computation. awk handles integer + float; result is an integer
+# when both midpoints are integers (n=odd → middle; n=even → mean of two
+# middles, which is integer when (a+b) is even).
+sorted=$(sort -n "$samples")
+median=$(echo "$sorted" | awk '
+    {
+        a[NR] = $1
+    }
+    END {
+        if (NR == 0) { exit 1 }
+        if (NR % 2 == 1) {
+            m = a[(NR + 1) / 2]
+        } else {
+            m = (a[NR / 2] + a[NR / 2 + 1]) / 2
+        }
+        # Print as integer if integral, else 1-decimal float. Avoids
+        # 145 → 145.000000 noise but preserves 145.5 for true mid-frac.
+        if (m == int(m)) {
+            printf "%d\n", m
+        } else {
+            printf "%.1f\n", m
+        }
+    }
+')
+
+echo "==> helm install + DaemonSet Ready: rolling median (rubric: median ≤ 300s, M3 #209)"
+echo
+echo "n_runs=$valid_runs"
+echo "median_seconds=$median"
+echo "samples_sorted=$(echo "$sorted" | tr '\n' ' ' | sed 's/ $//')"
+echo
+
+if [[ "$valid_runs" -lt 10 ]]; then
+    echo "NOTE: need ≥10 runs to flip M3 #209 carry-forward ⧗ → ☑;"
+    echo "      currently $valid_runs run(s) in window."
+fi
+
+# Gate: exit 1 iff median strictly above the rubric.
+if awk -v m="$median" 'BEGIN { exit (m > 300) ? 0 : 1 }'; then
+    echo "::error::install-to-Ready rolling median ${median}s exceeds 300s rubric (M3 #209)"
+    exit 1
+fi
+
+echo "ok: rolling median ${median}s within 300s rubric"