diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f888a762..63a3c1db 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,11 +18,19 @@ env: GO_VERSION_FILE: go.mod BINARY_BASENAME: tracecore_linux_amd64 CYCLONEDX_GOMOD_VERSION: v1.10.0 + # Image publish destination. Chart's values.yaml `image.repository` + # default points here; drift between the two is the contract this + # job upholds. Lowercase per OCI distribution spec. + IMAGE_REPO: ghcr.io/tracecoreai/tracecore jobs: build: name: build (reproducible verify) runs-on: ubuntu-latest + # Hard ceiling on a stuck job. The two cold-cache Go builds + diffoscope + # comparison fit comfortably under 10 minutes on ubuntu-latest; 20 leaves + # headroom for apt mirror weather without letting a hang burn billing. + timeout-minutes: 20 permissions: contents: read outputs: @@ -214,6 +222,7 @@ jobs: name: sbom (cyclonedx) runs-on: ubuntu-latest needs: build + timeout-minutes: 15 permissions: contents: read steps: @@ -285,6 +294,9 @@ jobs: name: sign (cosign keyless) runs-on: ubuntu-latest needs: build + # OIDC token + Fulcio + Rekor round-trip; usually <2min. Tighter cap so a + # Sigstore service degradation fails fast instead of stalling the release. + timeout-minutes: 10 permissions: id-token: write contents: read @@ -347,6 +359,7 @@ jobs: name: provenance (SLSA v1.0) runs-on: ubuntu-latest needs: build + timeout-minutes: 10 permissions: id-token: write attestations: write @@ -443,10 +456,173 @@ jobs: if-no-files-found: error retention-days: 7 + image: + name: image (build + push + sign + attest) + runs-on: ubuntu-latest + needs: build + # Buildkit cold pull of distroless + COPY layer + push to ghcr + 2× cosign + # OIDC round-trips. Real-world ~3-5min; 20 leaves slack for ghcr.io weather + # without letting a Sigstore stall block the release indefinitely. + timeout-minutes: 20 + permissions: + contents: read + packages: write # push to ghcr.io/tracecoreai/tracecore + id-token: write # cosign keyless + attest-build-provenance OIDC + attestations: write # attest-build-provenance writes a bundle + outputs: + digest: ${{ steps.push.outputs.digest }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # Pin by commit SHA so the Dockerfile + workflow this job + # reads match the commit that ran. The binary downloaded + # below has its own digest guard (see "Verify binary digest + # matches build job"), which catches the case of a binary + # built from a different tree than the Dockerfile read here. + # Together these close the force-push window: a force-push + # to the tag between `build` and `image` cannot smuggle in + # either a different binary or a different Dockerfile. + ref: ${{ github.sha }} + persist-credentials: false + + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: binary + path: release + + - name: Verify binary digest matches build job + env: + EXPECTED: ${{ needs.build.outputs.digest }} + run: | + set -euo pipefail + actual=$(sha256sum "release/$BINARY_BASENAME" | awk '{print $1}') + if [ "$actual" != "$EXPECTED" ]; then + echo "::error::artifact digest drift: build=$EXPECTED downloaded=$actual" + exit 1 + fi + + - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 + + - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute image tags + id: meta + env: + TAG: ${{ needs.build.outputs.tag }} + IS_PRERELEASE: ${{ contains(needs.build.outputs.tag, '-') }} + run: | + set -euo pipefail + # Always tag with the release version. Only float `:latest` + # for stable releases (no `-` in the SemVer pre-release + # field); a pre-release that takes `:latest` would silently + # promote alpha bits to the chart's default-pull surface. + tags="$IMAGE_REPO:$TAG" + if [ "$IS_PRERELEASE" != "true" ]; then + tags="$tags"$'\n'"$IMAGE_REPO:latest" + fi + { + echo "tags<> "$GITHUB_OUTPUT" + + - name: Compute SOURCE_DATE_EPOCH for image layer + id: sde + env: + TAG: ${{ needs.build.outputs.tag }} + run: | + set -euo pipefail + # Same epoch as the binary build so the image layer carrying + # the binary has a deterministic mtime. Without this, two + # builds at the same SHA produce different image digests + # purely from `now()` in the COPY layer. + epoch=$(git log -1 --pretty=%ct "$TAG") + echo "epoch=$epoch" >> "$GITHUB_OUTPUT" + + - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 + id: push + env: + # SOURCE_DATE_EPOCH must reach buildkit through the build + # environment (not just as a --build-arg) so buildkit's + # layer-timestamp rewrite kicks in. The Dockerfile also + # declares `ARG SOURCE_DATE_EPOCH` so the value is visible + # to a reader of the Dockerfile alone, and so a local + # `docker buildx build` reproduces the CI image bit-for-bit. + SOURCE_DATE_EPOCH: ${{ steps.sde.outputs.epoch }} + with: + context: . + file: Dockerfile + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + build-args: | + BINARY_PATH=release/${{ env.BINARY_BASENAME }} + SOURCE_DATE_EPOCH=${{ steps.sde.outputs.epoch }} + provenance: false # we attest below with GitHub's reusable attester + sbom: false # SBOM ships as a release artifact, not a manifest sub-attestation + + - uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2 + + - name: Sign image (keyless) + env: + DIGEST: ${{ steps.push.outputs.digest }} + run: | + set -euo pipefail + # Sign the manifest by digest, not tag — a registry rebuild + # of `:latest` would otherwise let an attacker replace what + # `cosign verify` resolves. The digest is the trust root. + cosign sign --yes "${IMAGE_REPO}@${DIGEST}" + + - name: Verify image signature smoke check + env: + DIGEST: ${{ steps.push.outputs.digest }} + IDENTITY_REGEXP: "^https://github.com/${{ github.repository }}/\\.github/workflows/release\\.yml@refs/tags/" + TAG: ${{ needs.build.outputs.tag }} + run: | + set -euo pipefail + # Same identity-binding pattern as the binary blob verify: + # pin to release.yml on a tag ref. Mismatch fails closed. + cosign verify "${IMAGE_REPO}@${DIGEST}" \ + --certificate-identity-regexp "$IDENTITY_REGEXP" \ + --certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \ + --certificate-github-workflow-ref "refs/tags/$TAG" \ + --certificate-github-workflow-trigger 'push' + + - uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-name: ${{ env.IMAGE_REPO }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true + + - name: Verify image attestation smoke check + env: + DIGEST: ${{ steps.push.outputs.digest }} + IDENTITY_REGEXP: "^https://github.com/${{ github.repository }}/\\.github/workflows/release\\.yml@refs/tags/" + TAG: ${{ needs.build.outputs.tag }} + run: | + set -euo pipefail + # `cosign sign` covered the manifest signature. `attest-build-provenance` + # pushed a SLSA v1 provenance attestation alongside the manifest in the + # registry (push-to-registry: true). Verify that attestation now, by + # digest + by predicate type + by the same identity binding, so a + # third-party verifier replaying the docs/reproducibility.md walkthrough + # won't be the first to discover a broken or missing attestation. + cosign verify-attestation "${IMAGE_REPO}@${DIGEST}" \ + --type slsaprovenance1 \ + --certificate-identity-regexp "$IDENTITY_REGEXP" \ + --certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \ + --certificate-github-workflow-ref "refs/tags/$TAG" \ + --certificate-github-workflow-trigger 'push' + release: name: release runs-on: ubuntu-latest - needs: [build, sbom, sign, provenance] + needs: [build, sbom, sign, provenance, image] + timeout-minutes: 10 permissions: contents: write steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad98348..f221446f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Pre-alpha. The CLI runs the M1 pipeline runtime end-to-end via factory-based ass ### Added +- **M3: container image publish to `ghcr.io/tracecoreai/tracecore`.** Every release tag now pushes a signed and attested container image alongside the existing binary + SBOM + cosign-bundle + provenance artifacts. The image is built from the same byte-reproducible binary the binary-side jobs sign (no in-image recompile), the manifest is cosigned by digest using the existing keyless OIDC identity binding, and a SLSA v1.0 provenance attestation is pushed to the registry via `attest-build-provenance push-to-registry=true`. Stable releases (no `-` in the SemVer pre-release field) also tag `:latest`; pre-releases do not. The Dockerfile pins `gcr.io/distroless/static-debian12:nonroot` by digest and runs as the distroless `nonroot` user (UID 65532) matching the chart's `runAsUser`. Verification walkthrough at [`docs/reproducibility.md`](docs/reproducibility.md) steps 8 and 9. Closes the long-standing chart-default-image gap (chart's `image.repository: ghcr.io/tracecoreai/tracecore` is now a live pull path, not a future one). - **M6 - integration recipes scaffold.** `docs/integrations/{otel-backend,honeycomb,datadog,clickhouse-direct}.md` ship the first four backend recipes plus matching `docs/integrations/examples/*.yaml`. Each recipe carries `` and `` HTML comments. `scripts/doc-check.sh` adds eight new gates: (a) every recipe contains >=1 fenced `yaml` block whose first non-blank line names a file under `docs/integrations/examples/`; (b) tested-against marker present; (c) last-verified marker present and <=180 days old; (d) `docs/README.md` indexes every recipe; (e) `docs/nps.md` carries the three canonical H3 survey headings (Recommend / Biggest change / Best part); (f) `docs/FAILURE-MODES.md` enumerates rows for `vendor SDK failure` / `exporter unreachable` / `config invalid` each citing a real `Test*` identifier; (g) `docs/getting-started.md` has <=5 fenced bash/sh blocks; (h) every `docs/integrations/examples/*.yaml` uses `REPLACE_WITH_*` placeholders (rejects `${VAR}` interpolation, since tracecore does not expand env vars in YAML). `scripts/validator-recipe.sh` runs `./tracecore validate --config=` against `tracecore`-tagged examples and `otelcol-contrib validate --config=` against contrib-tagged examples (pinned to v0.152.0 by SHA-256 against the upstream `opentelemetry-collector-releases` checksums file). New `validator-recipe` CI job feeds the `verify` aggregator. All eight new doc-check gates mutation-verified; all four example YAMLs empirically validated against their target binary (in-tree `tracecore` for the two tracecore-tagged recipes, `otelcol-contrib 0.152.0` for the two contrib-tagged recipes). Follow-ups (smoke.sh exercising getting-started, sandbox smoke test, per-recipe operator checklist, additional backends, CI contrib-binary cache, deprecation lint for `tested-against`) tracked in [`docs/followups/M6.md`](docs/followups/M6.md). See [`MILESTONES.md`](MILESTONES.md) §M6. - **M13 — pyspy receiver Phase 2 (alpha; wire protocol + Python helper)** — Closes the Phase 1 scaffold with an actively-emitting receiver. New Go units handle the UDS handshake, faulthandler parsing, fnv128a `stack.id` hashing, `plog.LogRecord` emission with RFC-0009's prescribed attribute set, and a single-goroutine trigger driving both full and main cadences through the half-duplex wire. Workload-side helper ships as `pip install tracecore-pyspy` (stdlib-only; seven-step shutdown per RFC §Helper lifecycle) with PyPI trusted-publisher OIDC + PEP 740 attestation publishing and typosquat-reservation stubs. End-to-end integration test spawns a real `python3` subprocess and round-trips a dump request to catch wire-protocol drift. Rank-in-hello-frame and per-window LRU dedup deferred to Phase 3 (see [`docs/FOLLOWUPS.md`](docs/FOLLOWUPS.md)). See [RFC-0009 §Phase deliverables](docs/rfcs/0009-pyspy-receiver-scope.md#phase-deliverables) and [`components/receivers/pyspy/README.md`](components/receivers/pyspy/README.md) for the operator-facing degraded-mode table. - **M13 — pyspy receiver scaffold (alpha; Phase 1)** — `components/receivers/pyspy/` ships the package skeleton, factory wired through `tools/components-gen`, config + Validate (operator-facing fields documented in [RFC-0009 §Config schema](docs/rfcs/0009-pyspy-receiver-scope.md#config-schema)), all 12 `IncError` kinds from RFC-0009 §Degraded modes declared in `kinds.go`, and the `target_not_attached` posture as the first observable Phase 1 behavior (empty `uds_dir` → single warning + `disabled_reason="target_not_attached"` self-metric + idle scan loop on a 30s retry cadence derived from kubelet liveness-probe grace window). CI gates landing alongside: `tools/pyspy-lint/` reads `go tool nm` output for the linked binary and rejects symbols whose names match `(?:^|[._/])Ptrace(?:$|[A-Z_])` / `(?:^|[._/])ptrace(?:$|[_])` / `process_vm_readv` (mutation-verify fixtures at `tools/pyspy-lint/testdata/{clean,violating}` so the gate is falsifiable on stdlib-only Go binaries where it would otherwise be vacuously true); chart-render `yq` step in `.github/workflows/chart.yml` asserts `capabilities.drop: [ALL]` + `capabilities.add: []` when `receivers.pyspy.enabled=true` (the receiver requires no capability addition because the helper walks frames in-target via faulthandler); frame-length-ceiling fuzz harness in `fuzz_test.go` plus a deliberate oversize-input mutation-verify test asserts `readFrame` returns `ErrFrameTooLarge` without allocating the 2³¹-1-byte payload; Linux-only `strace`-asserted integration test in `integration_linux_test.go` plus a falsifier counterpart under build tag `pyspy_strace_falsifier` asserts `kill/ptrace/process_vm_readv` are not invoked while the receiver idles. Phase 1 receiver does NOT yet emit OTLP records — the trigger loop, UDS connect, parser, and `stack.id` hash are Phase 2 deliverables. See [RFC-0009](docs/rfcs/0009-pyspy-receiver-scope.md) and [`components/receivers/pyspy/README.md`](components/receivers/pyspy/README.md). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ab8b901a..8e211b6e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,6 +51,7 @@ Bypass any hook with `--no-verify` if you must (`git commit --no-verify`, `git p - **Sign off your commits**: `git commit -s`. We require [DCO](https://developercertificate.org/). - **Sign your commits cryptographically.** `main` requires signed commits. The fastest path is SSH signing — run `scripts/setup-signing.sh` once and it configures `gpg.format=ssh`, `user.signingkey`, `commit.gpgsign=true`, `tag.gpgsign=true`. Register the same SSH key as a "Signing Key" in your GitHub account (Settings → SSH and GPG keys). Sigstore `gitsign` (keyless OIDC) is an accepted alternative; see [sigstore.dev/gitsign](https://docs.sigstore.dev/cosign/signing/git_support/). - **Reviewing a PR — optional rigorous-review skill.** `.claude/skills/pr-review-loop/` ships a Claude Code skill that drives a multi-phase structured review (author self-review, stakeholder lenses, adversarial deep read, A+ aspiration, simplification). Prereq: `ralph-loop@claude-plugins-official` plugin installed. Invoke via `/pr-review-loop` from inside a Claude Code session rooted in the PR's worktree. Optional — single-pass review remains the default. +- **Syncing a feature branch with `main` - use `git merge`, not `git rebase`.** When `main` advances during a long-lived feature branch, catch up with `git merge origin/main` followed by a plain `git push`. Do NOT `git rebase origin/main`. Rationale: `main` is squash-merged from PRs, so its linearity is preserved regardless of the feature branch's shape (the squash collapses any merge commits on the branch too). Rebasing a pushed feature branch rewrites every commit SHA and forces reviewers to lose their "show changes since last review" position; a merge keeps SHAs stable and skips the force-push ceremony. `required_linear_history` on `main` only governs how PRs land; it does *not* require feature branches to rebase. Reserve `git rebase` for branches that have never been pushed and have <=2 commits. ## Commit message format diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..2c613c05 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.7 +# +# Container image for the tracecore collector. This Dockerfile consumes +# a pre-built reproducible binary from `release/$BINARY_BASENAME`; it +# does NOT recompile. That is load-bearing: image reproducibility +# reduces to binary reproducibility (already gated in release.yml) plus +# the base layer pinned by digest below. +# +# Local build (CI calls the same): +# make build BIN=release/tracecore_linux_amd64 +# docker buildx build --platform=linux/amd64 \ +# --build-arg BINARY_PATH=release/tracecore_linux_amd64 \ +# -t tracecore:dev . +# +# The chart's pod spec assumes: +# * Non-root UID 65532 (distroless `nonroot`) +# * readOnlyRootFilesystem: true (tracecore writes nothing outside +# mounted volumes) +# * No shell, no package manager (distroless guarantees both) + +# Pinned by digest, not tag, so a registry rebuild of `:nonroot` cannot +# silently change what the chart's default install pulls. Refresh the +# digest when bumping; never use the floating tag. +FROM gcr.io/distroless/static-debian12:nonroot@sha256:d093aa3e30dbadd3efe1310db061a14da60299baff8450a17fe0ccc514a16639 + +ARG BINARY_PATH=release/tracecore_linux_amd64 + +# Declared so buildkit's SOURCE_DATE_EPOCH layer-rewrite picks it up; +# buildkit >= 0.11 (shipped with docker/build-push-action@v6) rewrites +# COPY layer timestamps to this epoch when the env var is set on the +# build invocation. Declaring the ARG here makes the determinism +# contract visible to readers of the Dockerfile alone and lets a local +# `docker buildx build --build-arg SOURCE_DATE_EPOCH=...` reproduce +# the CI image bit-for-bit. +ARG SOURCE_DATE_EPOCH + +COPY --chown=nonroot:nonroot ${BINARY_PATH} /usr/local/bin/tracecore + +USER nonroot:nonroot + +ENTRYPOINT ["/usr/local/bin/tracecore"] +CMD ["collect", "--config=/etc/tracecore/config.yaml"] diff --git a/Makefile b/Makefile index 0f44b58f..b5f8f3ce 100644 --- a/Makefile +++ b/Makefile @@ -224,12 +224,17 @@ test-extras: ## Run all test-suite extras NOT in `make ci`. Independent sub-tar done; \ exit $$status -doc-check: ## Verify test identifiers referenced in rot-prone docs exist in the source tree, AND alert names in component RUNBOOKs match the alerts.yaml, AND release.yml + docs/reproducibility.md share the same `gh attestation verify` flag set, AND Chart.yaml appVersion tracks internal/version.Version. +doc-check: ## Verify test identifiers referenced in rot-prone docs exist in the source tree, AND alert names in component RUNBOOKs match the alerts.yaml, AND release.yml + docs/reproducibility.md share the same `gh attestation verify` flag set, AND each `gh attestation verify` flag we use is still recognised by the installed `gh` CLI, AND Chart.yaml appVersion tracks internal/version.Version. @scripts/doc-check.sh @scripts/alert-check.sh @scripts/release-doc-parity.sh + @scripts/test-release-doc-parity.sh + @scripts/gh-attestation-flag-lint.sh @scripts/chart-appversion-check.sh +base-digest-check: ## Compare the Dockerfile pin for gcr.io/distroless/static-debian12:nonroot against the live registry digest. Strict mode (exits non-zero on drift) is for the M21 release-prep checklist; warn mode is for periodic invocation. + @scripts/base-digest-check.sh --strict + register-lint: ## Verify `func Register*` symbols live only under components/** (or an explicit allowlist). Enforces STRATEGY.md §"Each component owns its own Factory var". @scripts/register-lint.sh diff --git a/docs/followups/M3.md b/docs/followups/M3.md index 740c94f5..38768e49 100644 --- a/docs/followups/M3.md +++ b/docs/followups/M3.md @@ -10,16 +10,21 @@ natural boundary (per MEMORY.md `feedback_narrow_pr_scope`). Mostly M21-trigger or post-M3 follow-up cadence. - [ ] **`chart-appversion` CI gate enforces only non-empty, not drift-against-binary-tag.** *Partially shipped via PR #133:* `scripts/chart-appversion-check.sh` now compares `Chart.yaml` `appVersion` against `internal/version/version.go` `Version` and is wired into `make doc-check`. Remaining gap: the original ask was drift-against-the-actual-binary-release-tag (parse `gh release view` and assert `Chart.yaml.appVersion == binary_tag`). The shipped gate covers in-tree drift; the binary-tag check still depends on M21 publishing a real tag. *Trigger:* M21 release-tag prep (the binary-tag check becomes load-bearing when a real tag ships). -- [ ] **Container-image publish to `ghcr.io/tracecoreai/tracecore`.** - The chart's default `image.repository` - (`install/kubernetes/tracecore/values.yaml`) points at - `ghcr.io/tracecoreai/tracecore`, but `release.yml` today - publishes only the binary + SBOM + cosign bundle + SLSA - provenance as GitHub Release artifacts — no container image push. - Operators following the chart's defaults cannot pull yet. - RFC-0008 names this as the target operator-pull path; this - item closes the gap. *Trigger:* M21 v0.1.0 (release-tag-time - requirement) or first operator request, whichever comes first. + +- *Closed (see comment above): `ghcr.io/tracecoreai/tracecore:` + publishes on every release tag, signed and attested. The chart's + default `image.repository` is now a live pull path, not a future + one.* - [ ] **SLSA Build L3 via the reusable-workflow generator.** Replace the user-defined `release.yml` sign + attest steps with a call @@ -38,6 +43,117 @@ M21-trigger or post-M3 follow-up cadence. is otherwise exercised only at tag time, which means a toolchain-drift regression goes unnoticed until the next release. *Trigger:* first published v0.1.x. +- [ ] **Distroless base digest rotation cadence.** `scripts/base-digest-check.sh` + lands with this PR (compares the Dockerfile pin for + `gcr.io/distroless/static-debian12:nonroot` against the live + registry digest). `make base-digest-check` invokes it in + `--strict` mode; M21 release-prep should run that target before + cutting `v0.1.0` and re-pin if drift is reported. Past v0.1.0, + wire the same script into a nightly cron alongside the drift + cron above so a stale base layer surfaces between releases, not + at release-prep time. *Trigger:* M21 release-prep checklist + lands as a documented sequence. + +- *Closed (see comment above): all six jobs in `release.yml` carry + `timeout-minutes` caps; default 6h ceiling no longer applies.* + +- *Closed (see comment above): `cosign verify-attestation` smoke check + shipped in `image` job; image provenance attestation is now CI-verified + by predicate type + by identity binding, not just pushed and trusted.* + +## Out of scope for M3, tracked here for future milestones + +Each item below is an explicit non-goal of M3 (not a "we forgot"), held +out because the deliverable for M3 was *signed and attested image +publish on every release tag*, not *every supply-chain hardening that +references the published image*. Rowed here so a future audit can find +them without reading commit archaeology. + +- [ ] **Multi-arch image build (`linux/arm64` alongside `linux/amd64`).** + Today `release.yml` builds `linux/amd64` only; the binary half of + the pipeline matches (`GOARCH: amd64`). A multi-arch image would + need a buildx matrix + a manifest list, *and* a matching arm64 + binary build in the `build` job (same `SOURCE_DATE_EPOCH` / + reproducible-build guarantees on both architectures). The chart + defaults to amd64 nodes; demand-driven, not foundational. + *Trigger:* first user request for arm64 (Apple-silicon dev cluster + or AWS Graviton deploy) OR M5 cross-arch certification. +- [ ] **Container vulnerability scan gate (trivy / grype) in `release.yml`.** + A distroless base + a single Go binary minimizes the CVE surface, + but "minimal" is not "zero" — a future Go toolchain CVE or a + glibc/libssl CVE in the base layer would land silently today. + Wire `aquasecurity/trivy-action` or `anchore/scan-action` into + the `image` job at `--severity=CRITICAL,HIGH` failing-closed, with + `.trivyignore` for documented-and-accepted findings. *Trigger:* + first reported CVE against a published image OR M21 supply-chain + audit asks for it. +- [ ] **Image SBOM (syft / cyclonedx) attached as a manifest sub-attestation.** + Today the binary CycloneDX SBOM (`cyclonedx-gomod mod`) ships as + a release artifact but is not attached to the image manifest. A + pull-time verifier (admission controller, signature-aware + registry) can't cross-reference image-digest → SBOM without an + out-of-band lookup. Wire `anchore/sbom-action` or `cyclonedx` + with `--upload` to push an SBOM attestation by digest alongside + the provenance attestation. *Trigger:* same as the vuln-scan + gate; usually requested together. + +## Items impossible to accomplish locally — only verifiable on a real tag push + +These are not "deferred" in the sense of "we could but chose not to": +the tracecore project today has no infrastructure to exercise them +short of a real `vX.Y.Z` tag push. Listed here so M21 release-prep +includes them in its dry-run pass and so a future contributor does +not file a "missing test" issue assuming the gap is oversight. + +- [ ] **End-to-end image push smoke against `ghcr.io/tracecoreai/tracecore`.** + The `image` job in `release.yml` only fires on a `vX.Y.Z` tag + push, so the push + tag-compute + cosign sign + cosign verify + + attest-build-provenance chain is unexercised locally. The + mitigations in place: (a) `actionlint` on `release.yml`, + (b) `release-doc-parity.sh` image block (mutation-verified), + (c) `gh-attestation-flag-lint.sh` (catches gh CLI flag rename), + (d) the binary-digest guard in the `image` job. None of those + mitigations cover the registry-side semantics. *Trigger:* first + `vX.Y.Z` tag push (M21 v0.1.0 or any pre-release tag). +- [ ] **`gh attestation verify "oci://$DIGEST"` against a real + attestation in shape this pipeline emits.** No public OCI image + carries a GitHub Actions provenance attestation in the exact + shape `attest-build-provenance push-to-registry=true` writes + against `release.yml`, so the verifier walkthrough in + `docs/reproducibility.md` step 9 cannot be smoke-tested + end-to-end before M21. `gh-attestation-flag-lint.sh` partially + covers this by asserting each flag is still recognised by the + CLI; the residual risk is a *semantic* change to an existing + flag (e.g., `--source-ref` accepting a different format) that + no flag-name lint would catch. *Trigger:* same as above — + first tag push. +- [ ] **Two-build digest equality for the image.** The + `SOURCE_DATE_EPOCH` plumbing through buildkit's layer-rewrite + claims image reproducibility, but the claim is only verifiable + by building the image twice at the same SHA and diff'ing the + resulting manifest digests. Doing that locally requires a + working `docker buildx` (which the local dev environment + currently lacks; see "needs prod data" cross-shard for the + broader build-env gap). The CI runner has buildx — a second + build step in `release.yml` would close this gap inline, but + doubling the runner time at every tag push is a tradeoff worth + revisiting after the first real release confirms image stability + under churn. *Trigger:* M21 v0.1.0 ships and image-rebuild + drift becomes the next-most-likely supply-chain regression + class.