From 2fd28ec29e4314b3f4424cee6d6e44c4b17632b1 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Sun, 26 Apr 2026 23:40:31 +0800 Subject: [PATCH 1/2] docs: dev-env bootstrap script + Linux/Mac quick-path setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New scripts/setup-dev-env.sh — idempotent bootstrap for macOS (Homebrew) and Linux (apt/dnf/pacman). Installs rustup+stable, Node 20+, jj (with required identity), jq, AWS CLI v2; builds the workspace; runs smoke tests. Skips Google Chrome and AWS infra by design. - docs/dev-setup.md §1 gets a "Quick path" subsection pointing at the script; manual matrix preserved as fallback. §2 notes that bootstrap users can skip ahead. Verified: bash -n syntax check passes; detection logic dry-traced on the operator's Mac (every prerequisite resolves so a re-run would skip every installer). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/dev-setup.md | 29 +++++ scripts/setup-dev-env.sh | 227 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100755 scripts/setup-dev-env.sh diff --git a/docs/dev-setup.md b/docs/dev-setup.md index 3c629c52..194ed806 100644 --- a/docs/dev-setup.md +++ b/docs/dev-setup.md @@ -7,6 +7,33 @@ The CDP demo path is **the only supported path** — earlier Gmail-backed varian ## 1. Prerequisites +### Quick path: one-shot bootstrap (macOS + Linux) + +Fresh machine? Run the bootstrap script — it installs every prerequisite below, builds the workspace, and runs the smoke tests: + +```bash +bash scripts/setup-dev-env.sh +``` + +The script is idempotent (safe to re-run), detects macOS vs Linux (apt / dnf / pacman), and handles: + +- Homebrew (macOS) or the system package manager (Linux) +- `rustup` + stable toolchain +- Node 20+ (Homebrew `node@20`, NodeSource on apt/dnf, distro package on Arch) +- `jj` (Homebrew / pacman, or `cargo install jj-cli` as fallback) — also seeds the required `Hanwen Cheng ` jj identity if unset +- `jq` +- AWS CLI v2 (Homebrew on macOS, official zip on Linux) +- `cargo build --workspace --release` +- `npm install --prefix provisioner-scripts` + `playwright install chromium` +- `cargo test --workspace` and `npm test --prefix provisioner-scripts` as a smoke gate + +Two things the script intentionally does **not** do: + +1. **Install Google Chrome.** The CDP scrapers attach to real Chrome at `localhost:9222`; install it from . +2. **Touch AWS infra.** That's the one-time Stage 6 setup in §3. + +### Manual matrix (if you'd rather pick tools yourself) + | Tool | Why | Install | |---|---|---| | Rust (stable, edition 2021+) | Workspace crates | `rustup toolchain install stable && rustup default stable` | @@ -23,6 +50,8 @@ Optional but recommended: ## 2. Build everything +If you ran `scripts/setup-dev-env.sh` in §1, the workspace is already built and tested — skip ahead to §3. Otherwise: + ```bash cd ~/Projects/agentkeys # or wherever your checkout lives cargo build --workspace --release diff --git a/scripts/setup-dev-env.sh b/scripts/setup-dev-env.sh new file mode 100755 index 00000000..c9f3104e --- /dev/null +++ b/scripts/setup-dev-env.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash +# AgentKeys dev environment bootstrap for fresh macOS or Linux machines. +# +# Installs: rustup + stable toolchain, Node 20+, jj, jq, AWS CLI v2, then +# builds the Cargo workspace and the provisioner-scripts npm project. +# +# bash scripts/setup-dev-env.sh +# +# Idempotent: re-run safely. Skips anything already installed at a usable +# version. Does NOT install Google Chrome (needed by the CDP demo) — install +# that manually from https://www.google.com/chrome/. + +set -euo pipefail + +REPO_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$REPO_ROOT" + +log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; } +warn() { printf '\033[1;33m!!\033[0m %s\n' "$*" >&2; } +die() { printf '\033[1;31mxx\033[0m %s\n' "$*" >&2; exit 1; } + +have() { command -v "$1" >/dev/null 2>&1; } + +OS="$(uname -s)" +case "$OS" in + Darwin) PLATFORM=mac ;; + Linux) PLATFORM=linux ;; + *) die "Unsupported OS: $OS (this script handles macOS + Linux only)" ;; +esac +log "Platform detected: $PLATFORM" + +############################################################################### +# Package manager +############################################################################### +install_brew() { + if ! have brew; then + log "Installing Homebrew" + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + if [[ -x /opt/homebrew/bin/brew ]]; then + eval "$(/opt/homebrew/bin/brew shellenv)" + elif [[ -x /usr/local/bin/brew ]]; then + eval "$(/usr/local/bin/brew shellenv)" + fi + fi +} + +linux_pm() { + if have apt-get; then echo apt + elif have dnf; then echo dnf + elif have pacman; then echo pacman + else die "No supported Linux package manager (apt/dnf/pacman)" + fi +} + +if [[ "$PLATFORM" == mac ]]; then + install_brew + PM=brew +else + PM="$(linux_pm)" +fi +log "Package manager: $PM" + +pm_install() { + case "$PM" in + brew) brew install "$@" ;; + apt) sudo apt-get update -y && sudo apt-get install -y "$@" ;; + dnf) sudo dnf install -y "$@" ;; + pacman) sudo pacman -S --needed --noconfirm "$@" ;; + esac +} + +############################################################################### +# Core tools: curl, build essentials, jq +############################################################################### +log "Ensuring base build tools" +case "$PM" in + apt) pm_install curl build-essential pkg-config libssl-dev ca-certificates ;; + dnf) pm_install curl gcc gcc-c++ make pkgconf-pkg-config openssl-devel ca-certificates ;; + pacman) pm_install curl base-devel openssl ca-certificates ;; + brew) : ;; +esac + +if ! have jq; then + log "Installing jq" + pm_install jq +fi + +############################################################################### +# Rust (rustup + stable) +############################################################################### +if ! have rustup; then + log "Installing rustup + stable toolchain" + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal + # shellcheck disable=SC1091 + source "$HOME/.cargo/env" +else + log "rustup already installed -- ensuring stable toolchain" + rustup toolchain install stable >/dev/null + rustup default stable >/dev/null +fi +have cargo || { source "$HOME/.cargo/env"; } +log "Rust: $(rustc --version)" + +############################################################################### +# Node 20+ +############################################################################### +node_major() { node -v 2>/dev/null | sed -E 's/^v([0-9]+).*/\1/'; } + +needs_node=true +if have node; then + v="$(node_major)" + if [[ -n "$v" && "$v" -ge 20 ]]; then + needs_node=false + fi +fi + +if $needs_node; then + log "Installing Node 20+" + case "$PM" in + brew) + brew install node@20 + brew link --overwrite --force node@20 || true + ;; + apt) + curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - + sudo apt-get install -y nodejs + ;; + dnf) + curl -fsSL https://rpm.nodesource.com/setup_20.x | sudo -E bash - + sudo dnf install -y nodejs + ;; + pacman) + pm_install nodejs npm + ;; + esac +fi +log "Node: $(node -v) npm: $(npm -v)" + +############################################################################### +# jj (Jujutsu) +############################################################################### +if ! have jj; then + log "Installing jj (Jujutsu)" + case "$PM" in + brew) brew install jj ;; + pacman) pm_install jujutsu ;; + apt|dnf) + # No first-party packages on apt/dnf yet -- install via cargo. + cargo install --locked jj-cli + ;; + esac +fi +log "jj: $(jj --version)" + +# Identity required by CLAUDE.md global rules. +if ! jj config get user.name >/dev/null 2>&1; then + log "Setting jj identity (Hanwen Cheng )" + jj config set --user user.name "Hanwen Cheng" + jj config set --user user.email "heawen.cheng@gmail.com" +fi + +############################################################################### +# AWS CLI v2 +############################################################################### +if ! have aws; then + log "Installing AWS CLI v2" + case "$PLATFORM" in + mac) + brew install awscli + ;; + linux) + tmp="$(mktemp -d)" + arch="$(uname -m)" + case "$arch" in + x86_64) awsurl="https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" ;; + aarch64|arm64) awsurl="https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" ;; + *) die "Unsupported Linux arch for AWS CLI: $arch" ;; + esac + have unzip || pm_install unzip + curl -sSL "$awsurl" -o "$tmp/aws.zip" + unzip -q "$tmp/aws.zip" -d "$tmp" + sudo "$tmp/aws/install" --update + rm -rf "$tmp" + ;; + esac +fi +log "AWS CLI: $(aws --version 2>&1)" + +############################################################################### +# Build Rust workspace + provisioner-scripts +############################################################################### +log "Building Cargo workspace (release)" +cargo build --workspace --release + +log "Installing provisioner-scripts npm deps" +npm install --prefix provisioner-scripts + +log "Installing Playwright Chromium (browser only -- system deps may need sudo)" +if [[ "$PLATFORM" == linux ]]; then + npx --prefix provisioner-scripts playwright install chromium --with-deps +else + npx --prefix provisioner-scripts playwright install chromium +fi + +############################################################################### +# Smoke tests +############################################################################### +log "Smoke-testing: cargo test --workspace" +cargo test --workspace --quiet + +log "Smoke-testing: npm test --prefix provisioner-scripts" +npm test --prefix provisioner-scripts --silent + +cat <<'EOF' + +================================================================================ + AgentKeys dev environment ready. +================================================================================ +Next steps: + 1. Install Google Chrome if missing (CDP demo needs a real Chrome): + https://www.google.com/chrome/ + 2. One-time AWS infra: docs/stage6-aws-setup.md + 3. Run the demo: docs/dev-setup.md (sections 4 + 5) + +If you opened a fresh shell, source your cargo env: + source "$HOME/.cargo/env" +EOF From 7cc270ef21d39c8fbad989ae8700b4ce8f43e3e1 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Sun, 26 Apr 2026 23:41:02 +0800 Subject: [PATCH 2/2] =?UTF-8?q?docs(security):=20off-chain=20encrypted=20v?= =?UTF-8?q?ault=20=E2=80=94=20threat=20model=20+=20Stage=208=20+=20renumbe?= =?UTF-8?q?r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per security review: on-chain encrypted credential storage (pallet-secrets-vault) creates an unbounded harvest-now-decrypt-later window. Public + immutable + permanent ciphertext means any future TEE-key compromise leaks all historical credentials. Splitting the TEE into two enclaves does not address the consequence axis. The fix requires two architectural moves that compose: 1. Off-chain ciphertext (S3) + on-chain hash + audit 2. Forward-secret per-epoch DEK rotation with deletion of old ciphertext After K epochs, total TEE compromise leaks at most one epoch. Changes: - New docs/spec/threat-model-key-custody.md — canonical security position - New docs/stage8-wip.md — operational design (S3 layout, pallet-vault-pointers, rotation runbook, TEE-B encryption-center responsibilities) - docs/spec/plans/development-stages.md — inserted new Stage 8 (off-chain vault); renumbered old Stage 8 (memory hygiene) → Stage 9; old Stage 9 (Heima holding pen) → Stage 10. Parallelization table + change log updated. - docs/stage7-wip.md — scope-boundary note: Stage 7 ships isolation primitive only; vault deferred to Stage 8. - docs/spec/credential-backend-interface.md — Mapping table superseded banner; store_credential / read_credential / teardown_agent rows updated to pallet-vault-pointers + S3. - docs/spec/ses-email-architecture.md §16 — cross-reference (email pipeline is the precedent that Stage 8 generalizes). - wiki/blockchain-tee-architecture.md §1 — superseded banner; row rewritten to "vault pointers, not blobs"; new EpochDek row; new audit extrinsics. - wiki/data-classification.md §1 — credential-blob row updated; doc-level banner. - wiki/key-security.md §1 — v0.1 storage column updated; doc-level banner. - wiki/Home.md — added link to threat model; rules 1+2 wording aligned. - docs/contradictions.md §7.1 — resolved entry documenting the decision. Tracks: #57 (security finding), #58 (Stage 7 broker server) Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/contradictions.md | 39 ++++ docs/spec/credential-backend-interface.md | 10 +- docs/spec/plans/development-stages.md | 36 +++- docs/spec/ses-email-architecture.md | 2 + docs/spec/threat-model-key-custody.md | 252 ++++++++++++++++++++++ docs/stage7-wip.md | 3 + docs/stage8-wip.md | 234 ++++++++++++++++++++ wiki/Home.md | 5 +- wiki/blockchain-tee-architecture.md | 7 +- wiki/data-classification.md | 7 +- wiki/key-security.md | 4 +- 11 files changed, 582 insertions(+), 17 deletions(-) create mode 100644 docs/spec/threat-model-key-custody.md create mode 100644 docs/stage8-wip.md diff --git a/docs/contradictions.md b/docs/contradictions.md index 8e68bc5a..5e173e7f 100644 --- a/docs/contradictions.md +++ b/docs/contradictions.md @@ -387,6 +387,45 @@ Grouped by status and what the user probably wants to hit before Stage 8. --- +## 7.1 Where sensitive ciphertext lives — on chain vs off chain (RESOLVED 2026-04-26) + +This was Part 4 / Part 7 of the 2026-04-19 inventory's "key contradictions" list — the docs were silent or inconsistent on whether credential ciphertext lives in `pallet-secrets-vault` on chain (per `wiki/blockchain-tee-architecture.md` §1, `wiki/key-security.md` §1, `docs/spec/credential-backend-interface.md` "Mapping to Heima Primitives") or off-chain in S3. The closest-analogous existing pattern is the Stage 6 email pipeline, which puts raw MIME in S3 and only metadata on chain. + +| Source | Pre-2026-04-26 claim | +|---|---| +| `wiki/blockchain-tee-architecture.md` §1 row "Credential blobs" | "encrypted ciphertext, on chain in `pallet-secrets-vault`" | +| `wiki/data-classification.md` §1 row "Credential blobs" | "On chain: encrypted ciphertext" | +| `wiki/key-security.md` §1 v0.1 column | "Encrypted blob in Heima TEE (`pallet-secrets-vault`)" | +| `docs/spec/credential-backend-interface.md` Mapping table | `store_credential` → `pallet-secrets-vault::write_secret` | +| `docs/spec/ses-email-architecture.md` §4 + §6 | Email blobs already in S3 (precedent for off-chain) — not extended to credentials | + +**Threat-model finding (2026-04-26):** on-chain encrypted-blob storage creates an unbounded harvest-now-decrypt-later window — public + immutable + permanent ciphertext means any future TEE-key compromise leaks all historical data. Splitting the TEE into two enclaves does not fix the consequence axis. The fix has to be (a) move ciphertext off-chain so it isn't publicly observable forever, and (b) rotate per-epoch DEKs with deletion of old ciphertext. Both moves are required; they multiply rather than add. Full argument: [`docs/spec/threat-model-key-custody.md`](./spec/threat-model-key-custody.md). + +**Decision (2026-04-26):** Sensitive ciphertext lives **off-chain** in S3 under per-epoch DEKs. Chain holds `(blob_pointer, ciphertext_hash, epoch)` via new `pallet-vault-pointers`. The deprecated `pallet-secrets-vault` design is no longer a target. Forward-secret epoch rotation is the property the previous design did not have. + +**Applied to:** +- `docs/spec/threat-model-key-custody.md` — new doc; canonical position. +- `docs/stage8-wip.md` — new Stage 8 operational design (off-chain vault + rotation runbook). +- `docs/spec/plans/development-stages.md` — inserted new Stage 8; renumbered old Stage 8 (memory hygiene) → Stage 9 and old Stage 9 (Heima holding pen) → Stage 10. Parallelization table + change log updated. +- `docs/stage7-wip.md` — added scope-boundary note: Stage 7 ships the isolation primitive only; vault question deferred to Stage 8. +- `docs/spec/credential-backend-interface.md` — superseded banner on the Mapping table; rows for `store_credential` / `read_credential` / `teardown_agent` updated to point at `pallet-vault-pointers` + S3. +- `wiki/blockchain-tee-architecture.md` §1 — superseded banner; on-chain row rewritten to "vault pointers, not blobs"; new `EpochDek` row; new Stage 8 audit extrinsics added. +- `wiki/data-classification.md` §1 — credential-blob row updated to off-chain + per-epoch DEK; doc-level banner. +- `wiki/key-security.md` §1 — v0.1 storage column updated; doc-level banner. +- `wiki/Home.md` — reading-order link to the new threat-model doc; "four rules" wording softened on rules 1 + 2 to align with the new position. +- `docs/spec/ses-email-architecture.md` §16 — cross-reference added; framing the email pipeline as the precedent that Stage 8 generalizes. + +**Tracking:** [issue #57](https://github.com/litentry/agentKeys/issues/57) — security finding + remediation roadmap. + +**Why this resolution closes Part 7's gap (architectural commitments NOT yet made).** +1. ✅ "Is `pallet-secrets-vault` the final design, or is S3 off-chain fallback planned?" — **No**, the final design is off-chain S3 with chain pointers. Decided. +2. ✅ "Per-request ephemeral key material rotation (forward secrecy at read level)" — addressed via per-epoch DEK rotation; lazy-rotation variant chosen as default. +3. ❓ "TEE-side credential TTL / eviction policy" — partially addressed (DEK destroyed on epoch boundary; blobs lifecycle-deleted). Remaining tunables (epoch cadence, lifecycle TTL) tracked in [`docs/stage8-wip.md`](./stage8-wip.md) §5 open questions. +4. ❓ "MRSIGNER succession pallet spec" — orthogonal; out of scope here. +5. ❓ "TEE-hosted OIDC endpoint (fully sealed)" — orthogonal Stage 7b/future item. + +--- + ## 8. What this doc does NOT cover - Code-level bugs not cited in an issue or a wiki/plan doc. (Let code review handle those.) diff --git a/docs/spec/credential-backend-interface.md b/docs/spec/credential-backend-interface.md index a05961cb..9a428b8b 100644 --- a/docs/spec/credential-backend-interface.md +++ b/docs/spec/credential-backend-interface.md @@ -273,18 +273,20 @@ All `request_details` values MUST be serialized with **deterministic CBOR** (RFC ### Mapping to Heima Primitives +> **Superseded 2026-04-26 — vault rows.** The `store_credential` / `read_credential` rows below originally pointed at `pallet-secrets-vault` (on-chain encrypted blob store). Per [`./threat-model-key-custody.md`](./threat-model-key-custody.md) and [`../stage8-wip.md`](../stage8-wip.md), the canonical v0.1 design moves ciphertext **off-chain** into S3 under per-epoch DEKs. The chain holds only `(blob_pointer, ciphertext_hash, epoch)` via `pallet-vault-pointers`. Mapping rows updated below; the on-chain encrypted vault is no longer a target. + For the Heima backend implementation: | Trait Method | Heima Primitive | Notes | |-------------|----------------|-------| | `create_session` | Google OAuth → `pallet-identity-management` → `RegisterUserByOmniAccount` | Existing flow, reuse | | `create_child_session` | New: scoped session key minting in TEE worker (Kai Q1) | Needs to be built | -| `store_credential` | New: `pallet-secrets-vault::write_secret` or TEE worker storage (Kai Q2) | Needs to be built | -| `read_credential` | New: `pallet-secrets-vault::read_secret_intent` with scope enforcement (Kai Q3) | Needs to be built | +| `store_credential` | S3 PUT under `s3://agentkeys-vault////.enc` + new `pallet-vault-pointers::register_blob` extrinsic | Stage 8; replaces former `pallet-secrets-vault::write_secret` | +| `read_credential` | `pallet-vault-pointers::lookup` → S3 GET → TEE unwraps DEK + decrypts; scope check on chain | Stage 8; replaces former `pallet-secrets-vault::read_secret_intent` | | `query_audit` | Chain events + Subsquid/Subquery indexer | Standard Substrate dev | | `revoke_session` | Policy table update in TEE worker, propagates in ~1 block (~6s) (Kai Q9) | Verify with Kai | -| `teardown_agent` | Batch: revoke sessions + delete credential blobs | Composition of above | -| `shielding_key` | `pallet-teebag` shielding key (already public on chain) | Reuse | +| `teardown_agent` | Batch: revoke sessions + S3 lifecycle-delete blobs + epoch-rotate user DEK | Composition of above | +| `shielding_key` | `pallet-teebag` shielding key (already public on chain) | Reuse — used to wrap epoch DEKs, not to encrypt bulk data | ## 3. Payment Rail Abstraction diff --git a/docs/spec/plans/development-stages.md b/docs/spec/plans/development-stages.md index 04efed0d..0ac29efa 100644 --- a/docs/spec/plans/development-stages.md +++ b/docs/spec/plans/development-stages.md @@ -63,16 +63,38 @@ Today's Stage 6 still lists "interim" AWS-managed DKIM + static IAM user. To cal Expose `oidc.agentkeys.dev` as a conforming OIDC Identity Provider. Any cloud that accepts external OIDC federation (AWS, GCP, Azure, Snowflake, K8s) trusts AgentKeys once and gets per-user-wallet-tagged temp creds via standard federation. Unlocks bring-your-own-domain + per-user cloud-enforced isolation via `PrincipalTag`. Scratch notes: [`../../stage7-wip.md`](../../stage7-wip.md). Blocked on: public TLS for `oidc.agentkeys.dev`, TEE-held ES256 signer at `oidc/issuer/v1` (`heima-gaps §3`). -### Stage 8 — Production hardening (Priority A only for v0.1) +Stage 7 stops at the isolation primitive. **It does not commit a position on where credential ciphertext lives** — the previously-assumed `pallet-secrets-vault` (on-chain encrypted blob store) is superseded by Stage 8 below, per [`../threat-model-key-custody.md`](../threat-model-key-custody.md). + +### Stage 8 — Off-chain encrypted vault (NEW, 2026-04-26) + +Move credential ciphertext off the chain into S3 (initial) under per-epoch DEKs that rotate on a fixed cadence. Chain holds ownership records, audit, revocation, and ciphertext hashes — never the bytes. Composes with Stage 7 (the per-user S3 prefix is already PrincipalTag-gated). The forward-secret rotation is the property the previous design did not have: **total TEE compromise at any future point leaks at most one epoch of data, not all history.** + +Architectural rationale: [`../threat-model-key-custody.md`](../threat-model-key-custody.md). Operational design + runbook: [`../../stage8-wip.md`](../../stage8-wip.md). + +Key deliverables: +- New on-chain pallet `pallet-vault-pointers` (replaces the deprecated `pallet-secrets-vault` design) — `(user_wallet, service, agent, epoch, blob_id, ciphertext_hash)`. +- New `EpochDek` on-chain state — per-user wrapped DEK + epoch lifecycle. +- New TEE-B "rotation enclave" responsibility, separate code surface from the auth/decrypt enclave (TEE-A). +- S3 layout `s3://agentkeys-vault-////.enc`. +- Rotation runbook: weekly cadence + on-revocation; lazy re-encryption; lifecycle-driven deletion of old epochs. +- New audit extrinsics: `BlobWritten`, `EpochRotated`, `EpochDestroyed`. + +Blocked on: Heima-side review of `pallet-vault-pointers` shape; decision on TEE-A/TEE-B separation (merged for v0.1, split for v0.2 acceptable). Threshold-across-heterogeneous-platforms variant is v0.2+. + +### Stage 9 — Production hardening (Priority A only for v0.1) — was Stage 8 + +Renumbered 2026-04-26 to make room for Stage 8 above. Memory-hygiene work; independent of the vault refactor. - Daemon: `memfd_secret` via `SCM_RIGHTS` fd-passing for managed runtimes; credential zeroize on delivery; idle eviction. - CLI: `agentkeys whoami`, idempotent `init`, `zeroize` wrapping, `PR_SET_DUMPABLE=0`. - Optional: Touch-ID-gate master session on macOS; DEK + encrypted-file storage as cross-platform alternative. - Priority B/C (core pages, ptrace checks, CI checksec) deferred to post-v0.1. -### Stage 9 — Heima migration holding pen +### Stage 10 — Heima migration holding pen — was Stage 9 + +Renumbered 2026-04-26. -Design notes, not executable work. Pattern 4 (TEE-as-paymaster sponsored audit) chosen for v0.1 audit submission. Rate-limit gate (100 reads/min/session) is a Stage 8 prereq. Tracked in [issues #3, #4, #5](https://github.com/litentry/agentKeys/issues/3). +Design notes, not executable work. Pattern 4 (TEE-as-paymaster sponsored audit) chosen for v0.1 audit submission. Rate-limit gate (100 reads/min/session) is a Stage 9 prereq. Tracked in [issues #3, #4, #5](https://github.com/litentry/agentKeys/issues/3). ### npm package + DX polish @@ -98,15 +120,17 @@ New stages must extend `init.sh`, add a `stage-N-done.sh`, update `features.json |---|---|---| | Stage 5b (drift + fallback) | Telemetry from live Stage 5a usage | Stage 6 finalization, Stage 7 prep | | Stage 6 finalization (BYODKIM, auto-AssumeRole) | `heima-gaps §3` | Stage 7 OIDC (shared TEE signing substrate) | -| Stage 7 (OIDC provider) | Public TLS + TEE ES256 | Stage 8 Priority A | -| Stage 8 Priority A | Stage 4 complete (already shipped) | Anything — independent from the above | +| Stage 7 (OIDC provider) | Public TLS + TEE ES256 | Stage 8 vault design + Stage 9 Priority A | +| Stage 8 (off-chain vault) | Stage 7 PrincipalTag isolation; Heima-side `pallet-vault-pointers` review | Stage 9 Priority A (independent code surfaces) | +| Stage 9 Priority A (memory hygiene) | Stage 4 complete (already shipped) | Anything | -Critical path to v0.1 ship: Stage 5b telemetry → Stage 6 finalization → Stage 7 → Stage 8 Priority A. Two devs can split 5b from 6+7+8 cleanly. +Critical path to v0.1 ship: Stage 5b telemetry → Stage 6 finalization → Stage 7 → Stage 8 vault → Stage 9 Priority A. Stage 8 and Stage 9 can run in parallel; Stage 9 has no upstream dependency on Stage 8. --- ## Change log +- **2026-04-26:** Inserted new **Stage 8 — Off-chain encrypted vault** between current Stage 7 and the existing Stage 8 (now Stage 9). Renumbered Stage 9 → Stage 10. Driven by [`../threat-model-key-custody.md`](../threat-model-key-custody.md): on-chain encrypted-blob store creates an unbounded harvest-now-decrypt-later window. Operational design in [`../../stage8-wip.md`](../../stage8-wip.md). - **2026-04-23 (v2):** collapsed full stage-by-stage contracts into Shipped/Active/Planned; moved v1 to `docs/archived/`. - **2026-04-19:** Stage 5-7 reorder (old Stage 6/7/8 postponed to v0.1; hosted email + OIDC promoted). - **2026-04-16:** Stage 5 split into 5a (ships v0) and 5b (v0.1); Stage 6 (npm) postponed. diff --git a/docs/spec/ses-email-architecture.md b/docs/spec/ses-email-architecture.md index f80d77ea..22ab8e77 100644 --- a/docs/spec/ses-email-architecture.md +++ b/docs/spec/ses-email-architecture.md @@ -461,6 +461,8 @@ Total: ~2 weeks. No Lambda, no DynamoDB, no server-side MIME parsing — the bro ## 16. Cross-references - **[`wiki/oidc-federation.md`](../../wiki/oidc-federation.md)** — the generalized OIDC-provider design that §10.5 references; explains how the same ES256 key federates into AWS, GCP, Azure, Snowflake, K8s +- **[`docs/spec/threat-model-key-custody.md`](./threat-model-key-custody.md)** — generalizes this spec's "raw MIME in S3, metadata on chain" pattern to credential ciphertext too. The email pipeline is the precedent; Stage 8 generalizes it. +- **[`docs/stage8-wip.md`](../stage8-wip.md)** — the off-chain encrypted vault. Reuses this spec's S3 bucket pattern under a different prefix (`agentkeys-vault//...`). - `docs/spec/email-signing-backends.md` — the generalized trait (needs an SES section added; this spec supplies the content) - `docs/spec/credential-backend-interface.md` — the parent trait this extends - `docs/stage5-workspace-email-setup.md` — alternative: Google DWD operator runbook (preserved for enterprise deployments) diff --git a/docs/spec/threat-model-key-custody.md b/docs/spec/threat-model-key-custody.md new file mode 100644 index 00000000..a8d70889 --- /dev/null +++ b/docs/spec/threat-model-key-custody.md @@ -0,0 +1,252 @@ +# Threat Model: Key Custody and Sensitive-Data Storage + +**Date:** 2026-04-26 +**Status:** Design — supersedes the on-chain encrypted-vault assumption that runs through wiki/blockchain-tee-architecture.md, wiki/data-classification.md, wiki/key-security.md, and docs/spec/credential-backend-interface.md. +**Related issues:** [#57](https://github.com/litentry/agentKeys/issues/57) (this doc — security finding), [#9](https://github.com/litentry/agentKeys/issues/9) (master-seed HDKD), [`docs/spec/heima-gaps-vs-desired-architecture.md`](./heima-gaps-vs-desired-architecture.md), [`docs/stage8-wip.md`](../stage8-wip.md) + +This doc defines the canonical security position for **where sensitive ciphertext lives** and **how decryption keys are managed**. Earlier docs assume an on-chain encrypted vault (`pallet-secrets-vault`); this doc replaces that assumption with off-chain ciphertext + on-chain hash + forward-secret epoch rotation, and explains why. + +If you only read one section, read §3 (the threat that drove this) and §6 (the resulting position). + +--- + +## 1. The four properties this doc optimizes + +Every storage and key-custody decision in AgentKeys lives or dies on four security properties. Different threat surfaces map to different properties; conflating them is what produces "secure-sounding" architectures that fail in practice. + +| Property | Question it answers | Failure mode | +|---|---|---| +| **Authorization integrity** | Can an attacker mint a credential they were not authorized to mint? | Forward-only forgery; bounded by key rotation. | +| **Confidentiality (live)** | Can an attacker read sensitive data while it is in transit or at rest, today? | Time-bounded by detection + rotation. | +| **Retroactive confidentiality** | If the decryption key leaks at any future point, can an attacker decrypt data captured today? | **Unbounded in time. Permanent.** | +| **Metadata leak** | Can an attacker observe access patterns, ownership, or activity even without decrypting anything? | Side-channel; usually permanent. | + +The asymmetry across these properties is the whole point. An authorization-integrity breach is **recoverable** — you rotate keys, force re-pair, revoke on chain. A retroactive-confidentiality breach is **unrecoverable** — anyone who captured ciphertext during the vulnerable window decrypts forever. + +The current AgentKeys spec is strong on (1) and (2). It is silent on (3) and (4). This doc commits to a position on (3) and (4) — and shows that the position requires architectural changes, not just key-rotation policy. + +--- + +## 2. Restating the current Stage 7 stance (what we are revising) + +Stage 7 as currently specified ([`wiki/blockchain-tee-architecture.md`](../../wiki/blockchain-tee-architecture.md), [`wiki/key-security.md`](../../wiki/key-security.md), [`docs/spec/credential-backend-interface.md`](./credential-backend-interface.md)) takes these positions: + +1. **Credential ciphertext lives on chain** in a new `pallet-secrets-vault`, encrypted to the TEE shielding key. +2. **Shielding key sealed in TEE**, derived from the master seed via SLIP-0010 at path `shielding/v1`. +3. **Bearer tokens are short-lived** (≤30 d, AgentKeys policy) and revocable on chain (~6 s). +4. **Per-user isolation on shared cloud resources** (S3, GCP, etc.) via OIDC JWT → PrincipalTag → resource policy. +5. **Audit events on chain** as extrinsics, async via paymaster. + +Honest grade against the four properties: + +| Property | Stage 7 grade | Why | +|---|---|---| +| Authorization integrity | **A** | Bearer tokens revocable, audit on chain, per-user PrincipalTag isolation. | +| Confidentiality (live) | **B+** | Ciphertext encrypted to TEE shielding key; plaintext exists only in TEE during decrypt. Daemon and agent windows handled by Stage 9 (memory hygiene, formerly Stage 8). | +| Retroactive confidentiality | **F** | **Public ciphertext on an immutable ledger + single long-lived shielding key = unbounded harvest-now-decrypt-later window.** | +| Metadata leak | **C** | Chain storage keys reveal "user X stored Y at block N"; activity patterns are public even when contents are encrypted. | + +Properties (3) and (4) are the gap this doc closes. + +--- + +## 3. The threat: harvest-now-decrypt-later, on chain, forever + +Three properties of an immutable public ledger combine into a worst-case substrate for encrypted secrets: + +1. **Public.** Every node, every block explorer, every archival service can fetch every encrypted blob. +2. **Immutable.** "Delete" is a marker; the bytes remain in every archival node. +3. **Forever.** No expiry mechanism. Block N's ciphertext is still bit-identical at block N + 10 million. + +Combine those with a single long-lived shielding key (derived once from master seed at `shielding/v1`, never rotated in the current spec) and the consequence is: + +> **An attacker who copies the chain today and waits — for 1 year, 10 years, 30 years — wins everything if the shielding key ever leaks. Including credentials that have long since been "revoked" or "deleted" at the application layer.** + +This is the same "store now, decrypt later" model that motivates post-quantum migration, but it does not require a quantum break. It only requires: + +- Side-channel extraction of the sealed master seed at any future point (a real risk on commodity TEEs over decade timescales). +- A vendor-side compromise (Intel SGX has had several published microarchitectural breaks; future hardware will too). +- A successor enclave operator who is curious about the past. +- A single insider with sealed-storage extraction capability. + +None of these requires breaking AES, breaking the curve, or quantum cryptography. They only require that the *key* eventually leaves the TEE — once, ever, in any future timeframe. And the ciphertext we wrote today is still sitting in every chain node waiting to be decrypted. + +**Splitting the TEE into two enclaves does not fix this.** Splitting addresses the probability of joint compromise; the consequence on retroactive confidentiality is the same — the ciphertext is still public and still permanent. Same-platform splits are the worst case (single vulnerability takes both); heterogeneous threshold across SGX + TDX + Nitro reduces probability but not consequence. The fix has to be on the *consequence* axis, not the *probability* axis. + +--- + +## 4. The fix: two architectural moves that compose + +### Move 1 — off-chain ciphertext, on-chain hash + audit + +Move the ciphertext to S3 (or any off-chain content-addressed store; see §9 on alternatives). Keep on chain only what consensus is genuinely load-bearing for: ownership records, grants, audit, revocation, and the **hash of the ciphertext** (so tamper of the off-chain blob is detectable). + +The chain remains the source of authority. The off-chain layer is the source of bytes. + +**What changes structurally:** + +| | Old (Stage 7 stance) | New (this doc) | +|---|---|---| +| Ciphertext storage | `pallet-secrets-vault` on chain | S3 object `s3://agentkeys-vault////.enc` | +| Pointer / integrity | (implicit — chain is the bytes) | On-chain `(user_wallet, slot) → {blob_pointer, ciphertext_hash, epoch}` | +| Public ciphertext | **Yes — every node has it** | **No — bucket is private, AWS-IAM-gated** | +| Deletion | Marker only; bytes persist | Real — S3 lifecycle drops bytes | +| Metadata leak | Chain access patterns are public | S3 access patterns are private to the operator | +| Tamper evidence | Consensus | Hash on chain — detectable on read | +| Censorship resistance | High (permissionless reads) | Lower (AWS can pull the plug) — mitigated by content-addressed multi-backend (§9) | + +The ciphertext-hash on chain is the load-bearing primitive. It gives chain-level integrity guarantees for off-chain-stored data. A reader fetches `(blob_pointer, ciphertext_hash)` from chain, retrieves bytes from S3, recomputes hash, rejects if mismatched. The audit log records the access regardless of where bytes live. + +### Move 2 — forward-secret epoch rotation + +Encrypt with a per-epoch DEK (data encryption key) that is **rotated on a fixed cadence** and **destroyed after rotation**. Re-encrypt active blobs under the new DEK. Drop old blobs from S3 via lifecycle. Older epochs are no longer decryptable, even by the TEE that originally wrote them. + +``` +Epoch 0: DEK_0 encrypts blobs B0,0 ... B0,N +Epoch 1: DEK_1 encrypts blobs B1,0 ... B1,M (B0,* re-encrypted as B1,*' if still active) +Epoch 2: DEK_2 encrypts blobs B2,0 ... B2,K (DEK_0 destroyed; B0,* bytes deleted) +``` + +After rotation + deletion of epoch K, even total compromise of the TEE leaks at most epochs `K..current`. The earlier DEKs are gone; the earlier blobs are gone. Forward secrecy holds. + +**Critical:** forward secrecy is meaningful only if the old ciphertext also disappears. Rotating keys while the old ciphertext sits forever in chain archive nodes is cosmetic. Move 1 (off-chain storage) is what makes Move 2 (key rotation) deliver real forward secrecy. **The two moves multiply, they don't add.** + +### What total TEE compromise leaks under the combined design + +| Compromise | Old design (Stage 7 stance) | New design (this doc, after K epochs) | +|---|---|---| +| TEE master seed leaks today | All historical credentials, all users, forever | Only credentials encrypted under DEK_current; older epochs are irrecoverable | +| TEE master seed leaks 10 years from now | All credentials ever stored — chain still has the ciphertext | Same — only the then-current epoch | +| Shielding key only (not master seed) | All historical credentials | Only currently active blobs | +| Single user's blob leaks (e.g., S3 misconfiguration) | N/A — chain leaked or didn't | One blob, one user, one epoch | + +The blast radius collapses from "all data ever" to "one epoch's data." That is the entire point of forward secrecy, and it is achievable only when the two moves compose. + +--- + +## 5. What stays on chain — and why + +This doc does not propose abandoning the chain. The chain earns its keep doing things consensus is genuinely needed for. Storing bulk encrypted bytes is not one of those things; storing the structural facts about ownership, access, and audit is. + +| On chain (small, high-leverage) | Off chain (S3 / IPFS / ...) | +|---|---| +| `Ownership { user_wallet, slot, agent_wallet, blob_pointer, ciphertext_hash, epoch }` | The encrypted blob bytes themselves | +| `Grant { issuer_wallet, child_wallet, scope, expires_at }` | Encrypted user-data payloads (email blobs, vault entries, etc.) | +| Audit extrinsics: `BlobWritten { blob_pointer, hash, epoch }`, `CredsRead { child_wallet, blob_pointer, ts }`, `EpochRotated { from, to, ts }` | Old epochs' DEK-encrypted blobs (lifecycle-deleted) | +| Revocation list (≤ 6 s propagation) | | +| Per-domain DKIM trust anchor pubkey hashes | | +| OIDC issuer key pubkey hashes (JWKS authority) | | + +The chain footprint per user remains small (kilobytes, not megabytes), which is what makes the chain economics actually work. Bulk ciphertext on chain breaks the cost story whether or not it breaks the threat model. + +--- + +## 6. The resulting position (canonical, supersedes earlier docs) + +> AgentKeys does not store sensitive payloads on the blockchain or persistently inside the TEE. The blockchain holds ownership, grants, audit, revocation, and ciphertext hashes — never the ciphertext itself. The TEE holds key-derivation roots and per-request decryption capability — never bulk plaintext, never persistent per-user material beyond what the master seed reproduces. Sensitive ciphertext lives off-chain in content-addressed storage (S3 today; multi-backend later), under per-epoch DEKs that rotate on a fixed cadence with old ciphertext deleted at lifecycle. Total TEE compromise at any future point leaks at most the currently active epoch. + +Five concrete invariants, derived from that position: + +1. **No `pallet-secrets-vault`-style on-chain encrypted blob store.** Earlier doc claims to this effect are superseded. The chain stores `(blob_pointer, ciphertext_hash, epoch)`, not bytes. +2. **DEKs are per-epoch, not per-key-lifetime.** The shielding key derived from `shielding/v1` is used to **wrap** epoch DEKs, not to directly encrypt bulk data. Wrapping happens in the TEE; the wrapped DEK is committed on chain alongside the ownership record. +3. **Old DEKs are destroyed at rotation.** Once epoch K+1 begins and active blobs have been re-encrypted, the TEE no longer holds DEK_K. It is unrecoverable even by the TEE itself. +4. **S3 (or successor off-chain store) is authoritative for bytes; chain is authoritative for hash.** A retrieval that fails hash check is treated as a tamper event. +5. **Per-user isolation is cloud-enforced via PrincipalTag** (Stage 7) regardless of how this doc evolves. The two systems compose; this doc does not change Stage 7's isolation primitive. + +--- + +## 7. The encryption-center question — who holds the rotation authority + +Forward-secret rotation requires a clearly identified component that: + +- Decides when an epoch ends (cadence policy) +- Generates the new DEK (CSPRNG inside a trust boundary) +- Re-encrypts active blobs under the new DEK (or marks them stale) +- Destroys the old DEK (zeroize + drop) +- Emits the `EpochRotated` audit extrinsic +- Publishes the new wrapped DEK on chain + +Three candidates, ordered by attack-surface footprint: + +| Candidate | Attack surface | Comments | +|---|---|---| +| **TEE itself** (single enclave handling auth + decrypt + rotation) | Largest | Concentrates roles; rotation code adds bytes to the trust-critical surface. | +| **Dedicated rotation enclave** (TEE-B, separate from the auth/decrypt TEE-A) | Smaller | Can be small, network-isolated, no untrusted input parsing. Coordinates with TEE-A via attested channels. | +| **Threshold across heterogeneous enclaves** (SGX + TDX + Nitro k-of-n) | Smallest joint compromise probability | Highest implementation cost. Reasonable for v0.2+; out of scope for Stage 8. | + +This doc commits to the **dedicated rotation enclave** path for Stage 8, with the threshold variant as a v0.2+ consideration. Stage 8 design and operational runbook live in [`docs/stage8-wip.md`](../stage8-wip.md). + +Reducing TEE-B's attack surface is more important than splitting it from TEE-A. Specifically: + +- **No network I/O** (input via attested channel from TEE-A only; output to S3 + chain via signed write tokens). +- **No untrusted input parsing** (binary protocol, fixed-size messages, exhaustively typed). +- **No general-purpose host shared memory** (only the sealed master seed and per-rotation working set). +- **Code surface small enough to formally verify or at least exhaustively review** (rotation logic should fit in a few hundred lines). + +--- + +## 8. Composition with existing Stage 7 primitives + +Stage 7 (OIDC federation, PrincipalTag, per-user isolation) is unchanged by this doc. The OIDC-issuer key still lives at `oidc/issuer/v1`. The JWT mint still emits `agentkeys_user_wallet` claims. AWS PrincipalTag still gates access to per-user S3 prefixes. + +The only adjustment to the Stage 7 picture is **what gets gated**: the per-user prefix now contains the encrypted vault blobs (Stage 8) in addition to the email blobs (Stage 6). Same isolation primitive, broader scope. + +``` + s3://agentkeys-vault//... + ↑ + PrincipalTag-gated read (Stage 7) + ↑ + OIDC JWT carries user_wallet claim (Stage 7) + ↑ + TEE mints JWT with claim from authenticated session (Stage 7) + ↑ + Daemon presents bearer token + scope (Stage 4) +``` + +The encrypted blob inside that gated prefix is wrapped under DEK_current; DEK_current is wrapped under shielding key; shielding key is sealed in TEE; sealed via master seed. Three layers of wrapping, but the operational model stays simple: chain holds pointers, S3 holds bytes, TEE holds keys, and rotation cleans up after itself. + +--- + +## 9. Open questions + +These do not block adopting the position in §6 but need decisions before Stage 8 implementation lands. + +1. **Storage backend portfolio.** S3 first (operator already runs the SES + S3 stack). Multi-backend (S3 + IPFS + Filecoin + Arweave content-addressed) is the censorship-resistance answer to §4 Move 1's main concession. When does multi-backend land? v0.1 with S3-only? v0.2 with IPFS pinning? + +2. **Epoch cadence.** Daily? Weekly? Monthly? Per-credential-on-revoke? Tradeoff: shorter epoch = smaller leak window but more rotation cost; longer = the opposite. Default proposal: **weekly**, with on-demand rotation triggered by revocation events. + +3. **Re-encryption strategy at rotation.** Two options: + - **Eager**: re-encrypt all active blobs at epoch boundary. Predictable cost spike per rotation. + - **Lazy**: re-encrypt on next read; old blobs marked stale, removed at lifecycle TTL. Smoother cost; longer effective leak window for unread blobs. + Default proposal: **lazy with TTL**. Read-rate is predictable; idle blobs naturally expire. + +4. **What about Heima's existing pallet design?** [`heima-gaps-vs-desired-architecture.md`](./heima-gaps-vs-desired-architecture.md) discusses the upstream parachain's pattern of on-chain encrypted state. We need a follow-up gap entry: "off-chain ciphertext + on-chain hash, not on-chain encrypted blob." The Heima conversation moves from "build `pallet-secrets-vault`" to "build `pallet-vault-pointers` + `pallet-vault-audit`." + +5. **Threshold rotation** (TEE-B as k-of-n across heterogeneous platforms). Out of scope for Stage 8; flag as v0.2+ candidate when threat-modeling matures and second-platform enclave costs become acceptable. + +6. **Recovery from accidental DEK loss.** A bug or operational mistake destroys DEK_K before active blobs are re-encrypted under DEK_K+1. Affected blobs are unrecoverable by design — this is the cost of forward secrecy. Mitigation: instrumented rotation runs with audited preconditions; never destroy DEK_K until `EpochRotated` extrinsic confirms re-encryption complete. Operationally identical to a backup-aware key-rotation runbook. + +7. **Rotation under partial chain availability.** If the chain is wedged when an epoch boundary hits, the rotator cannot emit `EpochRotated`. Strategy: rotation is delayed (not skipped); the rotator's runbook covers chain-unavailable graceful degradation. + +--- + +## 10. Migration from current claims + +| Doc / claim | Current text says | After this doc | +|---|---|---| +| [`wiki/blockchain-tee-architecture.md`](../../wiki/blockchain-tee-architecture.md) §1 table row "Credential blobs" | "Encrypted ciphertext, on chain in `pallet-secrets-vault`" | Banner pointing here; row updated to "Pointer + ciphertext hash on chain; ciphertext off-chain (S3)" | +| [`wiki/data-classification.md`](../../wiki/data-classification.md) §1 row "Credential blobs" | "On chain: Encrypted (ciphertext)" | "On chain: Hash + pointer; In TEE: per-request decrypt only; Off-chain S3: ciphertext under per-epoch DEK" | +| [`wiki/key-security.md`](../../wiki/key-security.md) §1 table | "v0.1 Heima: Encrypted blob in Heima TEE (`pallet-secrets-vault`)" | "v0.1 (Stage 8): off-chain S3 ciphertext under per-epoch DEK; chain holds pointer + hash" | +| [`docs/spec/credential-backend-interface.md`](./credential-backend-interface.md) §"Mapping to Heima Primitives" | `store_credential` → `pallet-secrets-vault::write_secret` | `store_credential` → S3 write + on-chain `pallet-vault-pointers` extrinsic | +| [`docs/spec/plans/development-stages.md`](./plans/development-stages.md) Stage 8 (current) | "Production hardening — memory hygiene" | Renumbered to **Stage 9**; new **Stage 8 = off-chain encrypted vault** (this doc's position) | +| [`docs/spec/plans/development-stages.md`](./plans/development-stages.md) Stage 9 (current) | "Heima migration holding pen" | Renumbered to **Stage 10** | + +--- + +## 11. Cross-references + +- [`docs/stage8-wip.md`](../stage8-wip.md) — operational design for the off-chain vault (storage layout, rotation runbook, encryption-center responsibilities). +- [`docs/spec/heima-gaps-vs-desired-architecture.md`](./heima-gaps-vs-desired-architecture.md) — needs a new §5 "Off-chain ciphertext / `pallet-vault-pointers`" gap entry mirroring this doc's position. +- [`docs/spec/ses-email-architecture.md`](./ses-email-architecture.md) §4 — the email pipeline already uses the off-chain pattern; this doc generalizes it. +- [`wiki/tag-based-access.md`](../../wiki/tag-based-access.md) — Stage 7 PrincipalTag isolation, unchanged by this doc; gates the per-user S3 vault prefix. +- [`docs/contradictions.md`](../contradictions.md) — entry resolving "where does sensitive ciphertext live" added alongside this doc. diff --git a/docs/stage7-wip.md b/docs/stage7-wip.md index 8e91c3fe..fc2acb62 100644 --- a/docs/stage7-wip.md +++ b/docs/stage7-wip.md @@ -6,6 +6,8 @@ Expose our TEE (or interim ES256 signer) as a conforming OIDC Identity Provider at a stable public URL. Any cloud that trusts the issuer can exchange our JWTs for scoped temp creds via standard federation. Per [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md), this is the "Generalized OIDC Provider" stage after Stage 6 (Federated Own Email). +> **Scope boundary (added 2026-04-26).** Stage 7 ships the per-user isolation primitive — JWT claim → PrincipalTag → resource-policy gate. **It does not commit a position on where credential ciphertext lives.** The previously-assumed `pallet-secrets-vault` (on-chain encrypted blob store) is superseded by [`stage8-wip.md`](./stage8-wip.md), which moves ciphertext off-chain into the same PrincipalTag-gated S3 prefixes. See [`docs/spec/threat-model-key-custody.md`](./spec/threat-model-key-custody.md) for the architectural rationale. + ## Why it's not running yet - Needs `oidc.agentkeys.dev` (or equivalent) hosted publicly with a public-CA TLS cert so AWS IAM accepts `create-open-id-connect-provider`. @@ -121,3 +123,4 @@ When [`heima-gaps §3`](./spec/heima-gaps-vs-desired-architecture.md) closes, re - Host `services/oidc-stub/` publicly (CloudFront+S3 for static discovery + Lambda for sign) - Promote to `docs/manual-test-stage7.md` once the test passes live - Add the equivalent GCP Workload Identity Federation + Ali Cloud RAM recipes (Stage 7 target is generalized, not AWS-only) +- Hand off the credential-vault question to Stage 8 — the bucket prefix `s3://agentkeys-vault//` is the reuse point; ciphertext + per-epoch DEK rotation live in [`stage8-wip.md`](./stage8-wip.md), not here. diff --git a/docs/stage8-wip.md b/docs/stage8-wip.md new file mode 100644 index 00000000..9e0e52dc --- /dev/null +++ b/docs/stage8-wip.md @@ -0,0 +1,234 @@ +# Stage 8 — Off-Chain Encrypted Vault (WIP) + +> **WIP / scratchpad.** Operational design for the off-chain encrypted vault. The architectural position lives in [`docs/spec/threat-model-key-custody.md`](./spec/threat-model-key-custody.md); this doc translates that position into runbook material. Revise as the design lands. + +## What Stage 8 is + +Move credential ciphertext (and any other bulk encrypted user data) **off the chain**, into S3 (initially) under per-epoch DEKs that rotate on a fixed cadence. Chain retains ownership records, audit, revocation, and ciphertext hashes — the structural facts that need consensus — but holds no encrypted bytes itself. + +Two architectural moves, both required, both delivered together: + +1. **Off-chain ciphertext, on-chain hash + audit.** +2. **Forward-secret per-epoch DEK rotation, with deletion of old ciphertext at lifecycle.** + +The composition gives the property the project did not have before: **total TEE compromise at any future point leaks at most one epoch of data, not all history.** Rationale and threat model in [`threat-model-key-custody.md`](./spec/threat-model-key-custody.md). + +## Why it's not Stage 7 + +Stage 7 ships the OIDC + PrincipalTag isolation primitive. Stage 8 reuses that primitive but adds new work: + +- New on-chain pallet (`pallet-vault-pointers`) — replaces the `pallet-secrets-vault` design previously slated for v0.1. +- New TEE-B "rotation enclave" responsibility — separate code surface from the auth/decrypt enclave (TEE-A). +- New S3 layout under `s3://agentkeys-vault////...`. +- New rotation runbook and audit invariants. + +Stage 7's bucket-policy + JWT contracts are unchanged. Stage 8 is additive on top. + +## Scope of this doc + +| In scope | Out of scope | +|---|---| +| Storage layout for off-chain ciphertext | Threshold cryptography across heterogeneous TEE platforms (v0.2+) | +| Per-epoch DEK rotation cadence + runbook | Migration from existing on-chain encrypted state (no users yet) | +| `pallet-vault-pointers` data shape (high-level) | Pallet rust-source — landed when Heima conversation matures | +| Rotation-enclave (TEE-B) responsibilities + attack surface | TEE-B hardware platform decision (assumed: same SGX as TEE-A for v0.1) | +| Audit invariants + extrinsic shape | Frontend / CLI UX for vault management | + +## 1. Storage layout + +### S3 object key shape + +``` +s3://agentkeys-vault-////.enc +``` + +- ``: AWS account ID — bucket name uniqueness. +- ``: 0x-prefixed user wallet (lowercase, 42 chars). Matches the PrincipalTag claim. +- ``: canonical service name (`openrouter`, `openai`, …) for sharding readability. +- ``: monotonic integer epoch number (e.g., `00042`). New writes go to current epoch. +- ``: opaque random ID (`UUIDv7` suggested for sortable timestamps). + +### What's inside the `.enc` file + +The serialized payload is structurally similar to a JWE Compact form, but the design treats the construction as opaque — we control both ends: + +``` +| 4 bytes magic "AKv1" | 1 byte version | 32 bytes wrapped-DEK ID hash | +| 12 bytes nonce | N bytes ciphertext (AES-256-GCM) | 16 bytes tag | +``` + +The `wrapped-DEK ID hash` ties the blob to a specific epoch DEK. Ciphertext is AES-256-GCM with the DEK; AAD is `(user_wallet || service || epoch || blob_id)` so the encryption is bound to its identity at the storage layer. + +### What's on chain + +```rust +// pallet-vault-pointers (replaces the old pallet-secrets-vault design) +pub struct VaultPointer { + pub user_wallet: WalletAddress, + pub service: ServiceName, + pub agent_wallet: WalletAddress, // who can read + pub epoch: u32, + pub blob_id: [u8; 16], // UUID + pub ciphertext_hash: [u8; 32], // SHA-256 of the .enc payload + pub created_at: BlockNumber, + pub last_rotated_at: Option, + pub deleted_at: Option, +} + +// Per-epoch DEK metadata (the wrapped DEK itself is small enough to live on chain) +pub struct EpochDek { + pub epoch: u32, + pub wrapped_dek: [u8; 64], // DEK wrapped under TEE shielding key + pub created_at: BlockNumber, + pub destroyed_at: Option, +} +``` + +The wrapped DEK on chain is what makes the TEE stateless across restarts: any TEE that can derive the shielding key from master seed can unwrap any not-yet-destroyed DEK. After `destroyed_at` is set, the wrapped DEK is removed from chain state and unrecoverable. + +### What's in TEE memory (per-request, never persistent beyond request) + +- Unwrapped DEK for the epoch being read (held only for the duration of one decrypt; zeroed before return). +- Shielding key (sealed via master seed; standard for Stage 7 already). + +The TEE never holds the unwrapped DEK across requests. Each decrypt call re-unwraps from chain. + +## 2. Rotation runbook + +### Cadence + +**Default: weekly rotation, plus on-demand rotation triggered by:** + +- An on-chain `RevokeAgent` extrinsic for any user (rotates that user's epoch only — see "Per-user vs global rotation" below). +- An out-of-band security event flagged by the operator (compromise suspected). + +Weekly is the v0.1 default. Tunable per-user via grant policy in v0.2+. + +### Per-user vs global rotation + +Two strategies. We pick **per-user** for v0.1: + +| Strategy | Pros | Cons | +|---|---|---| +| Global epoch (one DEK for everyone, rotated weekly) | Cheap; one rotation event. | Rotation requires re-encrypting all users' blobs on the same cadence; cross-user blast radius if a single DEK leaks. | +| **Per-user epoch (one DEK per user, rotated on user-specific cadence)** | Tight blast radius; revocation of user X rotates only user X's DEK. | More on-chain state; per-user rotation runs in parallel — operationally noisier. | + +Per-user rotation is what makes the §3 §10 forward-secrecy argument tight. A user's whole vault re-encrypts on their epoch boundary; no other user is affected. + +### Rotation flow (per user) + +``` +PRE: TEE-B has authenticated against TEE-A via attested channel. +PRE: Chain shows EpochDek { epoch: K, wrapped_dek_K, destroyed_at: None } for user U. + +Step 1. TEE-B reads chain: list all VaultPointer rows for user U at epoch K. +Step 2. TEE-B unwraps DEK_K from wrapped_dek_K via shielding key. +Step 3. TEE-B generates DEK_{K+1} (256-bit CSPRNG inside enclave). +Step 4. For each blob B at epoch K: + a. GET s3://agentkeys-vault///K/.enc + b. Decrypt under DEK_K, validate AAD. + c. Re-encrypt under DEK_{K+1}, new AAD = (U || service || K+1 || blob_id). + d. PUT s3://agentkeys-vault///K+1/.enc + e. Compute new ciphertext_hash; emit pallet-vault-pointers::Update extrinsic. +Step 5. TEE-B emits EpochRotated { user: U, from: K, to: K+1 } extrinsic. +Step 6. After confirmation: TEE-B emits EpochDestroyed { user: U, epoch: K }. + - On-chain: wrapped_dek_K is removed; destroyed_at set. + - S3 lifecycle policy on prefix ///K/ now eligible for deletion. +Step 7. TEE-B zeroizes its in-memory DEK_K and DEK_{K+1}; returns control. +``` + +The audit trail (`EpochRotated`, `EpochDestroyed`, plus the per-blob `Update` extrinsics) is what makes "rotation actually happened" verifiable from the chain alone. + +### Lazy variant (preferred for v0.1) + +Eager re-encryption at rotation time has predictable cost spikes. Lazy re-encryption defers the work until the next read of each blob; idle blobs simply expire under the S3 lifecycle policy. + +``` +On rotation cadence: + - Generate DEK_{K+1}, publish wrapped_dek_{K+1}. + - Mark epoch K as "rotating". + +On next read of any blob at epoch K: + - TEE-A unwraps DEK_K, decrypts, returns plaintext to caller. + - TEE-A re-encrypts under DEK_{K+1}, writes new blob, updates pointer. + - (Caller-invisible.) + +After lifecycle TTL (e.g., 30 days): + - Any blob still at epoch K is deleted by S3 lifecycle. + - When the last K-blob is gone, TEE-B emits EpochDestroyed { K }. +``` + +Cost: smoother. Worst-case forward-secrecy window: lifecycle TTL (idle blobs persist that long). Both variants are operationally fine; lazy is the default for Stage 8. + +## 3. The encryption center — TEE-B + +### Responsibilities + +1. Generate fresh DEK on rotation events. +2. Wrap DEK under shielding key; publish on chain. +3. Re-encrypt active blobs (eager) or attest to lifecycle deletion (lazy). +4. Destroy old DEKs on `EpochDestroyed`. +5. Emit rotation audit extrinsics (`EpochRotated`, `EpochDestroyed`). + +### Attack-surface minimization (the real lever) + +Splitting TEE-B from TEE-A only matters if TEE-B has a strictly smaller attack surface. Otherwise the split is theater. + +**Hard rules for TEE-B:** + +- **No general network I/O.** TEE-B speaks only to (a) TEE-A via attested channel, (b) S3 via signed-only PUT/GET tokens minted in TEE-A, (c) the chain via paymaster-funded signed extrinsics. +- **No untrusted-input parsing.** All inputs are typed binary protocol with fixed-shape messages; no JSON, no XML, no MIME, no cookie strings. +- **No host shared memory beyond the sealed master seed and the in-flight DEK.** +- **Code surface ≤ ~500 lines of trust-critical Rust** (excluding crypto primitives, which come from a vetted library). Aim for human-reviewable end-to-end. +- **Stateless across rotations.** Every rotation reads its inputs from chain + S3, writes outputs, exits. No persistent runtime state. + +### What TEE-B does not do + +- **Does not authorize.** Authorization decisions are TEE-A's job (session validation, scope enforcement, JWT minting). TEE-B trusts only attested calls from TEE-A. +- **Does not see plaintext credentials in normal operation.** Re-encryption is ciphertext-to-ciphertext — TEE-B unwraps DEK_K, AES-decrypts to recover the user-data plaintext, immediately AES-encrypts under DEK_{K+1}, drops plaintext. The plaintext window is tens of microseconds per blob; never logged, never stored. +- **Does not respond to user-facing requests.** The user-facing decrypt path is TEE-A; TEE-B only runs on rotation cadence + revocation triggers. + +### Failure modes and recovery + +| Failure | Behavior | Recovery | +|---|---|---| +| TEE-B crashes mid-rotation | Some blobs at K+1, some still at K. Both DEKs still unwrappable from chain. | Resume from chain state on next rotation cycle. | +| Chain is wedged when rotation fires | Rotation is delayed; new DEK not published; old DEK not destroyed. | Operator runbook: wait for chain, re-trigger rotation. No data loss. | +| `EpochDestroyed` emitted before all blobs re-encrypted | **Data loss for not-yet-rotated blobs.** | Pre-condition check: never emit `EpochDestroyed` until lifecycle confirms zero K-objects remain. | +| TEE-B compromise (no other compromise) | Currently active DEK leaks → epoch K plaintext potentially recoverable. Older epochs unaffected (DEKs already destroyed). | Force-rotate all users; revoke the compromised TEE attestation; provision new TEE-B. | +| TEE-A compromise (no other compromise) | Authorization bypass + JWT minting forgery → attacker can ask TEE-A to decrypt anything. **Historical data still leaks if attacker has captured ciphertext.** Forward secrecy still holds for *deleted* epochs. | Rotate all keys; force re-pair; revoke JWTs on chain. | +| Both TEE-A and TEE-B compromised | Worst case. Currently active epoch leaks; older epochs (DEKs destroyed, blobs lifecycle-deleted) are still gone. | Forward-secrecy property still holds for the destroyed-epoch window. | + +The combined-compromise case is what motivates the heterogeneous-threshold variant in v0.2+ ([threat-model-key-custody.md §9](./spec/threat-model-key-custody.md)). For Stage 8, single-platform TEE-A + TEE-B is acceptable given the forward-secrecy bound. + +## 4. Migration from current claims + +There are no users today, so no live data to migrate. The migration is doc-and-design only: + +| Doc | Action | +|---|---| +| `wiki/blockchain-tee-architecture.md` §1 | Banner + table row update; cross-ref this doc + threat-model | +| `wiki/data-classification.md` §1 | Update credential-blob row to "off-chain S3 + on-chain hash" | +| `wiki/key-security.md` §1 | Update v0.1 storage column | +| `docs/spec/credential-backend-interface.md` "Mapping to Heima Primitives" | Replace `pallet-secrets-vault::write_secret` with S3 PUT + `pallet-vault-pointers::register_blob` | +| `docs/spec/heima-gaps-vs-desired-architecture.md` | New gap entry: "off-chain ciphertext + on-chain pointers, not on-chain encrypted state" | +| `docs/spec/plans/development-stages.md` | Renumber: new Stage 8 = this doc; old Stage 8 (memory hygiene) → Stage 9; old Stage 9 (Heima holding pen) → Stage 10 | + +## 5. Open questions / TODO pickups + +1. **Pallet-level work in Heima.** `pallet-vault-pointers` shape above is approximate; needs Kai-side review against Substrate idioms. +2. **Wrapped-DEK size on chain.** 64 bytes per epoch per user; weekly rotation × N users = small but non-zero. Ballpark check at expected user-count: 100k users × 52 weeks/year × 64 bytes ≈ 333 MB/year of pointer state. Acceptable; plan for archive-and-prune of `EpochDek` after `destroyed_at + retention_window`. +3. **Per-user vs per-(user, service) DEK granularity.** Current design: one DEK per user per epoch. Alternative: one DEK per (user, service) pair. Tighter blast radius; more rotation cost. v0.1 default = per-user. +4. **S3 lifecycle TTL for old epochs.** 30 d? 7 d? Tradeoff: shorter = sharper forward-secrecy guarantee; longer = safety margin against rotation bugs. Default proposal: 14 d. +5. **Attested channel TEE-A ↔ TEE-B.** Same enclave today (single SGX); tomorrow split. Need a clear protocol shape so the same code works in both deployments — or explicit "merged for v0.1, split for v0.2." +6. **Rotation-enclave deployment cadence.** Standalone scheduled job? In-process inside the TEE-A worker? Both work; pick after the Heima conversation lands. +7. **Cross-region failover.** Stage 6 ships `us-east-1` only. When does the vault go multi-region? Tied to chain-availability strategy; out of Stage 8 scope. + +## 6. Cross-references + +- [`docs/spec/threat-model-key-custody.md`](./spec/threat-model-key-custody.md) — the architectural position this doc implements. +- [`docs/stage7-wip.md`](./stage7-wip.md) — OIDC + PrincipalTag, the isolation primitive Stage 8 reuses. +- [`docs/stage6-aws-setup.md`](./stage6-aws-setup.md) — AWS infra for SES + S3 (singleton); the same AWS account hosts the vault bucket. +- [`docs/spec/heima-gaps-vs-desired-architecture.md`](./spec/heima-gaps-vs-desired-architecture.md) — needs new gap entry for `pallet-vault-pointers`. +- [`docs/spec/credential-backend-interface.md`](./spec/credential-backend-interface.md) — `store_credential` / `read_credential` semantics translate cleanly; mapping table updated. +- [`docs/spec/plans/development-stages.md`](./spec/plans/development-stages.md) — Stage 8 entry, post-renumber. diff --git a/wiki/Home.md b/wiki/Home.md index c854e1ea..02b64946 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -10,8 +10,8 @@ AgentKeys is a credential custody service: a TEE-backed vault that issues long-l Every spec and every service on top of AgentKeys preserves these four invariants (details in [Blockchain TEE Architecture §6](blockchain-tee-architecture#6-summary-the-four-rules)): -1. **Chain stores everything persistent** — single source of truth. -2. **TEE holds all private keys and does all computation** — no key leaves the enclave. +1. **Chain stores everything persistent** — single source of truth for ownership, grants, audit, revocation, and ciphertext hashes. **Not bulk encrypted bytes** ([threat-model-key-custody](https://github.com/litentry/agentKeys/blob/main/docs/spec/threat-model-key-custody.md)). +2. **TEE holds key-derivation roots and per-request decryption capability** — never bulk plaintext, never persistent per-user material beyond what the master seed reproduces. 3. **Clients hold only a JWT, not private keys** — bearer tokens, short blast radius. 4. **AgentKeys brokers credentials, not operations** — daemons call remote services directly; our compute scales with user count, not operation frequency. @@ -25,6 +25,7 @@ Every spec and every service on top of AgentKeys preserves these four invariants - **[Session Token](session-token)** — 30-day JWT bearer; issuance, storage, revocation - **[Key Security](key-security)** — TEE keys, master session key, storage tiers, threat model - **[Data Classification](data-classification)** — data classes, where each lives, retention policy +- **[Threat Model: Key Custody](https://github.com/litentry/agentKeys/blob/main/docs/spec/threat-model-key-custody.md)** *(spec)* — why nothing sensitive lives on chain or persistently in TEE; off-chain ciphertext + forward-secret epoch rotation (Stage 8) ### Credential lifecycle (canonical, published wiki) diff --git a/wiki/blockchain-tee-architecture.md b/wiki/blockchain-tee-architecture.md index 8281e2f1..8a8db7d4 100644 --- a/wiki/blockchain-tee-architecture.md +++ b/wiki/blockchain-tee-architecture.md @@ -13,6 +13,8 @@ Companion docs: ### Blockchain (Heima parachain) +> **Superseded 2026-04-26.** The "Credential blobs … `pallet-secrets-vault`" row below was the v0.1 design until the threat-model review found that on-chain encrypted ciphertext creates an unbounded harvest-now-decrypt-later window. The canonical position is now **off-chain ciphertext + on-chain hash**, delivered in Stage 8. See [`docs/spec/threat-model-key-custody.md`](../docs/spec/threat-model-key-custody.md) and [`docs/stage8-wip.md`](../docs/stage8-wip.md). The row is preserved for historical context; the new design uses `pallet-vault-pointers` instead. + The blockchain is the **single source of truth** for all persistent state. It is an append-only, publicly verifiable, tamper-evident ledger that every participant can read and no single party can rewrite. **What it stores (on-chain state):** @@ -22,10 +24,11 @@ The blockchain is the **single source of truth** for all persistent state. It is | --------------------------------------------------------------------- | ---------------------------- | ------------------------------ | --------------------------------------------------- | | OmniAccount records (wallet address, linked identities) | `pallet-omni-account` | TEE (on account creation) | TEE, CLI, block explorer | | Session records (pubkey, scope, TTL, parent, revocation status) | New AgentKeys pallet | TEE (on session mint / revoke) | TEE (on every credential read) | -| Credential blobs (encrypted ciphertext, keyed by owner/agent/service) | `pallet-secrets-vault` (new) | TEE (on `store_credential`) | TEE (on `read_credential`) | +| ~~Credential blobs (encrypted ciphertext, keyed by owner/agent/service)~~ Vault pointers `(user_wallet, service, epoch, blob_id, ciphertext_hash)` | ~~`pallet-secrets-vault` (deprecated)~~ → `pallet-vault-pointers` (Stage 8) | TEE (on `store_credential`) | TEE (on `read_credential`); chain holds **no ciphertext**; bytes live in S3 | +| Per-epoch wrapped DEK metadata (Stage 8) | `pallet-vault-pointers::EpochDek` | TEE-B rotation enclave | TEE-A on decrypt | | Pair requests (daemon_pubkey, scope, alias, valid_until) | New AgentKeys pallet | TEE (on pair request open) | TEE (on master fetch / approve) | | Pair approvals (encrypted child session, master signature) | New AgentKeys pallet | TEE (on approval) | TEE (daemon reads approval) | -| Audit events (credential reads, stores, revocations, pair events) | New AgentKeys pallet | TEE (async, paymaster-funded) | Block explorer, Subsquid indexer, `agentkeys usage` | +| Audit events (credential reads, stores, revocations, pair events, **Stage 8: `BlobWritten`, `EpochRotated`, `EpochDestroyed`**) | New AgentKeys pallet | TEE (async, paymaster-funded) | Block explorer, Subsquid indexer, `agentkeys usage` | | Wallet USDC balances (x402 payment rail) | EVM / `pallet-evm` | x402 protocol | Agents, billing system | diff --git a/wiki/data-classification.md b/wiki/data-classification.md index aaab4c81..afeb1061 100644 --- a/wiki/data-classification.md +++ b/wiki/data-classification.md @@ -1,12 +1,15 @@ # Data Classification: what is encrypted, what is plaintext, where -Every piece of data in AgentKeys exists in one or more of three locations: the blockchain, the TEE, and the client (CLI or daemon). This document maps each data item to its encryption status at each location. +> **Updated 2026-04-26 — credential storage row.** The "Credential blobs" row in §1 used to read "On chain: encrypted ciphertext." That position is superseded — sensitive ciphertext now lives **off-chain** (S3) under per-epoch DEKs that rotate; chain holds only `(blob_pointer, ciphertext_hash, epoch)`. Architectural rationale: [`docs/spec/threat-model-key-custody.md`](../docs/spec/threat-model-key-custody.md). Operational design: [`docs/stage8-wip.md`](../docs/stage8-wip.md). The change is structural, not cosmetic — it closes the harvest-now-decrypt-later gap that on-chain ciphertext could not. + +Every piece of data in AgentKeys exists in one or more of four locations: the blockchain, the TEE, **off-chain content-addressed storage (S3 today)**, and the client (CLI or daemon). This document maps each data item to its encryption status at each location. Companion docs: - `[wiki/blockchain-tee-architecture.md](./blockchain-tee-architecture.md)` — how the chain and TEE split responsibilities - `[wiki/key-security.md](./key-security.md)` — session vs credential security, hardening layers - `[wiki/serve-and-audit.md](./serve-and-audit.md)` — audit submission, Pattern 4, fee funding +- [`docs/spec/threat-model-key-custody.md`](../docs/spec/threat-model-key-custody.md) — why nothing sensitive lives on chain or persistently in TEE; forward-secret epoch rotation --- @@ -15,7 +18,7 @@ Companion docs: | Data | On chain | In TEE | On client | | ----------------------------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------- | ----------------------------------------------------------------------- | -| **Credential blobs** (API keys — the actual secrets) | Encrypted (ciphertext, encrypted to TEE shielding key) | Plaintext in memory during decrypt, then wiped | Plaintext in memory during MCP delivery, then wiped (Stage 8 hardening) | +| **Credential blobs** (API keys — the actual secrets) | **Pointer + ciphertext hash only** (`pallet-vault-pointers`); ciphertext lives off-chain in S3 under per-epoch DEK | Plaintext in memory during decrypt, then wiped; DEK unwrapped per-request, never persistent in TEE memory across calls | Plaintext in memory during MCP delivery, then wiped (Stage 9 hardening, formerly Stage 8) | | **Shielding private key** | Public key only (registered via `register_enclave()`) | Sealed storage (SGX encrypted at rest) | Never | | **RSA JWT signing key** | Never | Sealed storage (PKCS#1 DER file) | Never | | **User wallet private keys** (current model: per-user) | Never | Sealed storage (per `pallet-bitacross`) | Never | diff --git a/wiki/key-security.md b/wiki/key-security.md index d134175d..9f96d5fc 100644 --- a/wiki/key-security.md +++ b/wiki/key-security.md @@ -1,5 +1,7 @@ # Key Security in AgentKeys +> **Updated 2026-04-26 — v0.1 storage column.** §1 used to say "v0.1 Heima: encrypted blob in `pallet-secrets-vault` (on chain)." That target is superseded. The canonical v0.1 design moves ciphertext **off-chain** (S3) under per-epoch DEKs that rotate; chain holds only pointer + hash. See [`docs/spec/threat-model-key-custody.md`](../docs/spec/threat-model-key-custody.md) and [`docs/stage8-wip.md`](../docs/stage8-wip.md). Stage 9 (memory hygiene; renumbered from Stage 8 in the same change) is unaffected. + Reference notes on how AgentKeys stores session tokens and user credentials, what the macOS Keychain prompt behavior actually means, and why our architecture looks different from 1Password-style local vaults. These notes were compiled from a Stage 4 manual-test debugging session and are meant to answer the questions real testers and reviewers ask when they first see prompts pop up. @@ -14,7 +16,7 @@ AgentKeys splits secrets across two tiers with different security properties. Ev | Tier | What it is | Where it lives (v0 mock) | Where it lives (v0.1 Heima) | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | | **Master session key** | The CLI's own bearer token, used in `Authorization: Bearer ...` to authenticate to the backend. One per user-device pair. | OS keychain via `keyring-rs` (macOS Keychain / Windows Credential Manager / Linux libsecret) | Same | -| **User-stored credentials** | The API keys the user's agents actually consume — `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, etc. | Encrypted blob in backend SQLite (axum + rusqlite) | Encrypted blob in Heima TEE (`pallet-secrets-vault`), client-encrypted to the TEE shielding key before transit | +| **User-stored credentials** | The API keys the user's agents actually consume — `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, etc. | Encrypted blob in backend SQLite (axum + rusqlite) | **Off-chain ciphertext in S3** under per-epoch DEK; chain holds `(blob_pointer, ciphertext_hash, epoch)` via `pallet-vault-pointers` (Stage 8). DEK wrapped under TEE shielding key, unwrapped per-request, destroyed on epoch rotation. | Reference spec lines: