Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 177 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,19 @@ env:
GO_VERSION_FILE: go.mod
BINARY_BASENAME: tracecore_linux_amd64
CYCLONEDX_GOMOD_VERSION: v1.10.0
# Image publish destination. Chart's values.yaml `image.repository`
# default points here; drift between the two is the contract this
# job upholds. Lowercase per OCI distribution spec.
IMAGE_REPO: ghcr.io/tracecoreai/tracecore

jobs:
build:
name: build (reproducible verify)
runs-on: ubuntu-latest
# Hard ceiling on a stuck job. The two cold-cache Go builds + diffoscope
# comparison fit comfortably under 10 minutes on ubuntu-latest; 20 leaves
# headroom for apt mirror weather without letting a hang burn billing.
timeout-minutes: 20
permissions:
contents: read
outputs:
Expand Down Expand Up @@ -214,6 +222,7 @@ jobs:
name: sbom (cyclonedx)
runs-on: ubuntu-latest
needs: build
timeout-minutes: 15
permissions:
contents: read
steps:
Expand Down Expand Up @@ -285,6 +294,9 @@ jobs:
name: sign (cosign keyless)
runs-on: ubuntu-latest
needs: build
# OIDC token + Fulcio + Rekor round-trip; usually <2min. Tighter cap so a
# Sigstore service degradation fails fast instead of stalling the release.
timeout-minutes: 10
permissions:
id-token: write
contents: read
Expand Down Expand Up @@ -347,6 +359,7 @@ jobs:
name: provenance (SLSA v1.0)
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
permissions:
id-token: write
attestations: write
Expand Down Expand Up @@ -443,10 +456,173 @@ jobs:
if-no-files-found: error
retention-days: 7

image:
name: image (build + push + sign + attest)
runs-on: ubuntu-latest
needs: build
# Buildkit cold pull of distroless + COPY layer + push to ghcr + 2× cosign
# OIDC round-trips. Real-world ~3-5min; 20 leaves slack for ghcr.io weather
# without letting a Sigstore stall block the release indefinitely.
timeout-minutes: 20
permissions:
contents: read
packages: write # push to ghcr.io/tracecoreai/tracecore
id-token: write # cosign keyless + attest-build-provenance OIDC
attestations: write # attest-build-provenance writes a bundle
outputs:
digest: ${{ steps.push.outputs.digest }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# Pin by commit SHA so the Dockerfile + workflow this job
# reads match the commit that ran. The binary downloaded
# below has its own digest guard (see "Verify binary digest
# matches build job"), which catches the case of a binary
# built from a different tree than the Dockerfile read here.
# Together these close the force-push window: a force-push
# to the tag between `build` and `image` cannot smuggle in
# either a different binary or a different Dockerfile.
ref: ${{ github.sha }}
persist-credentials: false

- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: binary
path: release

- name: Verify binary digest matches build job
env:
EXPECTED: ${{ needs.build.outputs.digest }}
run: |
set -euo pipefail
actual=$(sha256sum "release/$BINARY_BASENAME" | awk '{print $1}')
if [ "$actual" != "$EXPECTED" ]; then
echo "::error::artifact digest drift: build=$EXPECTED downloaded=$actual"
exit 1
fi

- uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0

- uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Compute image tags
id: meta
env:
TAG: ${{ needs.build.outputs.tag }}
IS_PRERELEASE: ${{ contains(needs.build.outputs.tag, '-') }}
run: |
set -euo pipefail
# Always tag with the release version. Only float `:latest`
# for stable releases (no `-` in the SemVer pre-release
# field); a pre-release that takes `:latest` would silently
# promote alpha bits to the chart's default-pull surface.
tags="$IMAGE_REPO:$TAG"
if [ "$IS_PRERELEASE" != "true" ]; then
tags="$tags"$'\n'"$IMAGE_REPO:latest"
fi
{
echo "tags<<EOF"
echo "$tags"
echo "EOF"
} >> "$GITHUB_OUTPUT"

- name: Compute SOURCE_DATE_EPOCH for image layer
id: sde
env:
TAG: ${{ needs.build.outputs.tag }}
run: |
set -euo pipefail
# Same epoch as the binary build so the image layer carrying
# the binary has a deterministic mtime. Without this, two
# builds at the same SHA produce different image digests
# purely from `now()` in the COPY layer.
epoch=$(git log -1 --pretty=%ct "$TAG")
echo "epoch=$epoch" >> "$GITHUB_OUTPUT"

- uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
id: push
env:
# SOURCE_DATE_EPOCH must reach buildkit through the build
# environment (not just as a --build-arg) so buildkit's
# layer-timestamp rewrite kicks in. The Dockerfile also
# declares `ARG SOURCE_DATE_EPOCH` so the value is visible
# to a reader of the Dockerfile alone, and so a local
# `docker buildx build` reproduces the CI image bit-for-bit.
SOURCE_DATE_EPOCH: ${{ steps.sde.outputs.epoch }}
with:
context: .
file: Dockerfile
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
build-args: |
BINARY_PATH=release/${{ env.BINARY_BASENAME }}
SOURCE_DATE_EPOCH=${{ steps.sde.outputs.epoch }}
provenance: false # we attest below with GitHub's reusable attester
sbom: false # SBOM ships as a release artifact, not a manifest sub-attestation

- uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2

- name: Sign image (keyless)
env:
DIGEST: ${{ steps.push.outputs.digest }}
run: |
set -euo pipefail
# Sign the manifest by digest, not tag — a registry rebuild
# of `:latest` would otherwise let an attacker replace what
# `cosign verify` resolves. The digest is the trust root.
cosign sign --yes "${IMAGE_REPO}@${DIGEST}"

- name: Verify image signature smoke check
env:
DIGEST: ${{ steps.push.outputs.digest }}
IDENTITY_REGEXP: "^https://github.com/${{ github.repository }}/\\.github/workflows/release\\.yml@refs/tags/"
TAG: ${{ needs.build.outputs.tag }}
run: |
set -euo pipefail
# Same identity-binding pattern as the binary blob verify:
# pin to release.yml on a tag ref. Mismatch fails closed.
cosign verify "${IMAGE_REPO}@${DIGEST}" \
--certificate-identity-regexp "$IDENTITY_REGEXP" \
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
--certificate-github-workflow-ref "refs/tags/$TAG" \
--certificate-github-workflow-trigger 'push'

- uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0
with:
subject-name: ${{ env.IMAGE_REPO }}
subject-digest: ${{ steps.push.outputs.digest }}
push-to-registry: true

- name: Verify image attestation smoke check
env:
DIGEST: ${{ steps.push.outputs.digest }}
IDENTITY_REGEXP: "^https://github.com/${{ github.repository }}/\\.github/workflows/release\\.yml@refs/tags/"
TAG: ${{ needs.build.outputs.tag }}
run: |
set -euo pipefail
# `cosign sign` covered the manifest signature. `attest-build-provenance`
# pushed a SLSA v1 provenance attestation alongside the manifest in the
# registry (push-to-registry: true). Verify that attestation now, by
# digest + by predicate type + by the same identity binding, so a
# third-party verifier replaying the docs/reproducibility.md walkthrough
# won't be the first to discover a broken or missing attestation.
cosign verify-attestation "${IMAGE_REPO}@${DIGEST}" \
--type slsaprovenance1 \
--certificate-identity-regexp "$IDENTITY_REGEXP" \
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
--certificate-github-workflow-ref "refs/tags/$TAG" \
--certificate-github-workflow-trigger 'push'

release:
name: release
runs-on: ubuntu-latest
needs: [build, sbom, sign, provenance]
needs: [build, sbom, sign, provenance, image]
timeout-minutes: 10
permissions:
contents: write
steps:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Pre-alpha. The CLI runs the M1 pipeline runtime end-to-end via factory-based ass

### Added

- **M3: container image publish to `ghcr.io/tracecoreai/tracecore`.** Every release tag now pushes a signed and attested container image alongside the existing binary + SBOM + cosign-bundle + provenance artifacts. The image is built from the same byte-reproducible binary the binary-side jobs sign (no in-image recompile), the manifest is cosigned by digest using the existing keyless OIDC identity binding, and a SLSA v1.0 provenance attestation is pushed to the registry via `attest-build-provenance push-to-registry=true`. Stable releases (no `-` in the SemVer pre-release field) also tag `:latest`; pre-releases do not. The Dockerfile pins `gcr.io/distroless/static-debian12:nonroot` by digest and runs as the distroless `nonroot` user (UID 65532) matching the chart's `runAsUser`. Verification walkthrough at [`docs/reproducibility.md`](docs/reproducibility.md) steps 8 and 9. Closes the long-standing chart-default-image gap (chart's `image.repository: ghcr.io/tracecoreai/tracecore` is now a live pull path, not a future one).
- **M6 - integration recipes scaffold.** `docs/integrations/{otel-backend,honeycomb,datadog,clickhouse-direct}.md` ship the first four backend recipes plus matching `docs/integrations/examples/*.yaml`. Each recipe carries `<!-- tested-against: ... -->` and `<!-- last-verified: YYYY-MM-DD -->` HTML comments. `scripts/doc-check.sh` adds eight new gates: (a) every recipe contains >=1 fenced `yaml` block whose first non-blank line names a file under `docs/integrations/examples/`; (b) tested-against marker present; (c) last-verified marker present and <=180 days old; (d) `docs/README.md` indexes every recipe; (e) `docs/nps.md` carries the three canonical H3 survey headings (Recommend / Biggest change / Best part); (f) `docs/FAILURE-MODES.md` enumerates rows for `vendor SDK failure` / `exporter unreachable` / `config invalid` each citing a real `Test*` identifier; (g) `docs/getting-started.md` has <=5 fenced bash/sh blocks; (h) every `docs/integrations/examples/*.yaml` uses `REPLACE_WITH_*` placeholders (rejects `${VAR}` interpolation, since tracecore does not expand env vars in YAML). `scripts/validator-recipe.sh` runs `./tracecore validate --config=` against `tracecore`-tagged examples and `otelcol-contrib validate --config=` against contrib-tagged examples (pinned to v0.152.0 by SHA-256 against the upstream `opentelemetry-collector-releases` checksums file). New `validator-recipe` CI job feeds the `verify` aggregator. All eight new doc-check gates mutation-verified; all four example YAMLs empirically validated against their target binary (in-tree `tracecore` for the two tracecore-tagged recipes, `otelcol-contrib 0.152.0` for the two contrib-tagged recipes). Follow-ups (smoke.sh exercising getting-started, sandbox smoke test, per-recipe operator checklist, additional backends, CI contrib-binary cache, deprecation lint for `tested-against`) tracked in [`docs/followups/M6.md`](docs/followups/M6.md). See [`MILESTONES.md`](MILESTONES.md) §M6.
- **M13 — pyspy receiver Phase 2 (alpha; wire protocol + Python helper)** — Closes the Phase 1 scaffold with an actively-emitting receiver. New Go units handle the UDS handshake, faulthandler parsing, fnv128a `stack.id` hashing, `plog.LogRecord` emission with RFC-0009's prescribed attribute set, and a single-goroutine trigger driving both full and main cadences through the half-duplex wire. Workload-side helper ships as `pip install tracecore-pyspy` (stdlib-only; seven-step shutdown per RFC §Helper lifecycle) with PyPI trusted-publisher OIDC + PEP 740 attestation publishing and typosquat-reservation stubs. End-to-end integration test spawns a real `python3` subprocess and round-trips a dump request to catch wire-protocol drift. Rank-in-hello-frame and per-window LRU dedup deferred to Phase 3 (see [`docs/FOLLOWUPS.md`](docs/FOLLOWUPS.md)). See [RFC-0009 §Phase deliverables](docs/rfcs/0009-pyspy-receiver-scope.md#phase-deliverables) and [`components/receivers/pyspy/README.md`](components/receivers/pyspy/README.md) for the operator-facing degraded-mode table.
- **M13 — pyspy receiver scaffold (alpha; Phase 1)** — `components/receivers/pyspy/` ships the package skeleton, factory wired through `tools/components-gen`, config + Validate (operator-facing fields documented in [RFC-0009 §Config schema](docs/rfcs/0009-pyspy-receiver-scope.md#config-schema)), all 12 `IncError` kinds from RFC-0009 §Degraded modes declared in `kinds.go`, and the `target_not_attached` posture as the first observable Phase 1 behavior (empty `uds_dir` → single warning + `disabled_reason="target_not_attached"` self-metric + idle scan loop on a 30s retry cadence derived from kubelet liveness-probe grace window). CI gates landing alongside: `tools/pyspy-lint/` reads `go tool nm` output for the linked binary and rejects symbols whose names match `(?:^|[._/])Ptrace(?:$|[A-Z_])` / `(?:^|[._/])ptrace(?:$|[_])` / `process_vm_readv` (mutation-verify fixtures at `tools/pyspy-lint/testdata/{clean,violating}` so the gate is falsifiable on stdlib-only Go binaries where it would otherwise be vacuously true); chart-render `yq` step in `.github/workflows/chart.yml` asserts `capabilities.drop: [ALL]` + `capabilities.add: []` when `receivers.pyspy.enabled=true` (the receiver requires no capability addition because the helper walks frames in-target via faulthandler); frame-length-ceiling fuzz harness in `fuzz_test.go` plus a deliberate oversize-input mutation-verify test asserts `readFrame` returns `ErrFrameTooLarge` without allocating the 2³¹-1-byte payload; Linux-only `strace`-asserted integration test in `integration_linux_test.go` plus a falsifier counterpart under build tag `pyspy_strace_falsifier` asserts `kill/ptrace/process_vm_readv` are not invoked while the receiver idles. Phase 1 receiver does NOT yet emit OTLP records — the trigger loop, UDS connect, parser, and `stack.id` hash are Phase 2 deliverables. See [RFC-0009](docs/rfcs/0009-pyspy-receiver-scope.md) and [`components/receivers/pyspy/README.md`](components/receivers/pyspy/README.md).
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Bypass any hook with `--no-verify` if you must (`git commit --no-verify`, `git p
- **Sign off your commits**: `git commit -s`. We require [DCO](https://developercertificate.org/).
- **Sign your commits cryptographically.** `main` requires signed commits. The fastest path is SSH signing — run `scripts/setup-signing.sh` once and it configures `gpg.format=ssh`, `user.signingkey`, `commit.gpgsign=true`, `tag.gpgsign=true`. Register the same SSH key as a "Signing Key" in your GitHub account (Settings → SSH and GPG keys). Sigstore `gitsign` (keyless OIDC) is an accepted alternative; see [sigstore.dev/gitsign](https://docs.sigstore.dev/cosign/signing/git_support/).
- **Reviewing a PR — optional rigorous-review skill.** `.claude/skills/pr-review-loop/` ships a Claude Code skill that drives a multi-phase structured review (author self-review, stakeholder lenses, adversarial deep read, A+ aspiration, simplification). Prereq: `ralph-loop@claude-plugins-official` plugin installed. Invoke via `/pr-review-loop` from inside a Claude Code session rooted in the PR's worktree. Optional — single-pass review remains the default.
- **Syncing a feature branch with `main` - use `git merge`, not `git rebase`.** When `main` advances during a long-lived feature branch, catch up with `git merge origin/main` followed by a plain `git push`. Do NOT `git rebase origin/main`. Rationale: `main` is squash-merged from PRs, so its linearity is preserved regardless of the feature branch's shape (the squash collapses any merge commits on the branch too). Rebasing a pushed feature branch rewrites every commit SHA and forces reviewers to lose their "show changes since last review" position; a merge keeps SHAs stable and skips the force-push ceremony. `required_linear_history` on `main` only governs how PRs land; it does *not* require feature branches to rebase. Reserve `git rebase` for branches that have never been pushed and have <=2 commits.

## Commit message format

Expand Down
42 changes: 42 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# syntax=docker/dockerfile:1.7
#
# Container image for the tracecore collector. This Dockerfile consumes
# a pre-built reproducible binary from `release/$BINARY_BASENAME`; it
# does NOT recompile. That is load-bearing: image reproducibility
# reduces to binary reproducibility (already gated in release.yml) plus
# the base layer pinned by digest below.
#
# Local build (CI calls the same):
# make build BIN=release/tracecore_linux_amd64
# docker buildx build --platform=linux/amd64 \
# --build-arg BINARY_PATH=release/tracecore_linux_amd64 \
# -t tracecore:dev .
#
# The chart's pod spec assumes:
# * Non-root UID 65532 (distroless `nonroot`)
# * readOnlyRootFilesystem: true (tracecore writes nothing outside
# mounted volumes)
# * No shell, no package manager (distroless guarantees both)

# Pinned by digest, not tag, so a registry rebuild of `:nonroot` cannot
# silently change what the chart's default install pulls. Refresh the
# digest when bumping; never use the floating tag.
FROM gcr.io/distroless/static-debian12:nonroot@sha256:d093aa3e30dbadd3efe1310db061a14da60299baff8450a17fe0ccc514a16639

ARG BINARY_PATH=release/tracecore_linux_amd64

# Declared so buildkit's SOURCE_DATE_EPOCH layer-rewrite picks it up;
# buildkit >= 0.11 (shipped with docker/build-push-action@v6) rewrites
# COPY layer timestamps to this epoch when the env var is set on the
# build invocation. Declaring the ARG here makes the determinism
# contract visible to readers of the Dockerfile alone and lets a local
# `docker buildx build --build-arg SOURCE_DATE_EPOCH=...` reproduce
# the CI image bit-for-bit.
ARG SOURCE_DATE_EPOCH

COPY --chown=nonroot:nonroot ${BINARY_PATH} /usr/local/bin/tracecore

USER nonroot:nonroot

ENTRYPOINT ["/usr/local/bin/tracecore"]
CMD ["collect", "--config=/etc/tracecore/config.yaml"]
Loading
Loading