diff --git a/.github/workflows/chart.yml b/.github/workflows/chart.yml index 3d98cd25..fbf6b111 100644 --- a/.github/workflows/chart.yml +++ b/.github/workflows/chart.yml @@ -260,6 +260,91 @@ jobs: grep -q "Does not match pattern '\^/'" /tmp/tls-schema.out \ || { echo "::error::schema error did not name the absolute-path violation"; cat /tmp/tls-schema.out; exit 1; } echo "ok: schema rejects non-absolute tls.mountPath" + - name: AppArmor profile version-gating (M5b follow-up) + # Falsifier for the M5b appArmor follow-up. Restricted PSS + # permits an undefined AppArmor profile, so the chart was + # compliant without this knob; explicit RuntimeDefault hardens + # against clusters whose local policy is stricter. The + # structured `pod.securityContext.appArmorProfile` field is GA + # in K8s 1.30+; on 1.28 / 1.29 the legacy + # `container.apparmor.security.beta.kubernetes.io/` + # pod annotation carries the same intent. The chart auto- + # selects via semverCompare against + # .Capabilities.KubeVersion.Version — operators do not pick. + # + # Six mutation checks bound the contract: + # 1. K8s 1.30 + default values -> structured field rendered, + # legacy annotation absent. + # 2. K8s 1.28 + default values -> legacy annotation rendered, + # structured field absent. + # 3. K8s 1.30 + toggle off -> neither rendered. + # 4. K8s 1.28 + toggle off -> neither rendered. + # 5. type=Localhost without + # localhostProfile -> render fails closed with + # operator-visible error. + run: | + set -eo pipefail + # 1. K8s 1.30 default: structured field, no annotation. + r130=$(helm template demo install/kubernetes/tracecore \ + --kube-version 1.30.0 \ + --show-only templates/daemonset.yaml) + aa_type=$(echo "$r130" | yq '.spec.template.spec.securityContext.appArmorProfile.type') + test "$aa_type" = "RuntimeDefault" \ + || { echo "::error::K8s 1.30 default: appArmorProfile.type = $aa_type (expected RuntimeDefault)"; exit 1; } + legacy=$(echo "$r130" | yq '.spec.template.metadata.annotations | with_entries(select(.key | test("apparmor"))) | length') + test "$legacy" = "0" \ + || { echo "::error::K8s 1.30 default: legacy apparmor annotation rendered (expected absent)"; exit 1; } + echo "ok: K8s 1.30 default — structured field RuntimeDefault, no legacy annotation" + # 2. K8s 1.28 default: legacy annotation, no structured field. + r128=$(helm template demo install/kubernetes/tracecore \ + --kube-version 1.28.0 \ + --show-only templates/daemonset.yaml) + ann=$(echo "$r128" | yq '.spec.template.metadata.annotations["container.apparmor.security.beta.kubernetes.io/tracecore"]') + test "$ann" = "runtime/default" \ + || { echo "::error::K8s 1.28 default: legacy annotation = $ann (expected runtime/default)"; exit 1; } + structured=$(echo "$r128" | yq '.spec.template.spec.securityContext | has("appArmorProfile")') + test "$structured" = "false" \ + || { echo "::error::K8s 1.28 default: structured appArmorProfile rendered (expected absent — GA in 1.30+)"; exit 1; } + echo "ok: K8s 1.28 default — legacy annotation runtime/default, no structured field" + # 3. K8s 1.30 toggle off: neither rendered. + r130_off=$(helm template demo install/kubernetes/tracecore \ + --kube-version 1.30.0 \ + --set securityHardening.appArmorProfile.enabled=false \ + --show-only templates/daemonset.yaml) + structured_off=$(echo "$r130_off" | yq '.spec.template.spec.securityContext | has("appArmorProfile")') + test "$structured_off" = "false" \ + || { echo "::error::K8s 1.30 toggle off: structured appArmorProfile rendered (expected absent)"; exit 1; } + # 4. K8s 1.28 toggle off: neither rendered. + r128_off=$(helm template demo install/kubernetes/tracecore \ + --kube-version 1.28.0 \ + --set securityHardening.appArmorProfile.enabled=false \ + --show-only templates/daemonset.yaml) + legacy_off=$(echo "$r128_off" | yq '.spec.template.metadata.annotations | with_entries(select(.key | test("apparmor"))) | length') + test "$legacy_off" = "0" \ + || { echo "::error::K8s 1.28 toggle off: legacy apparmor annotation rendered (expected absent)"; exit 1; } + echo "ok: toggle off — neither code path renders on 1.30 or 1.28" + # 5. type=Localhost without localhostProfile fails closed. + if helm template demo install/kubernetes/tracecore \ + --kube-version 1.30.0 \ + --set securityHardening.appArmorProfile.type=Localhost \ + --show-only templates/daemonset.yaml >/tmp/aa-bad.out 2>&1; then + echo "::error::type=Localhost without localhostProfile rendered successfully (expected helm failure)" + cat /tmp/aa-bad.out + exit 1 + fi + grep -q "localhostProfile to be set" /tmp/aa-bad.out \ + || { echo "::error::missing 'localhostProfile to be set' guidance in helm error"; cat /tmp/aa-bad.out; exit 1; } + echo "ok: type=Localhost without localhostProfile fails closed with operator-visible error" + # 6. type=Localhost with profile renders the path through. + custom=$(helm template demo install/kubernetes/tracecore \ + --kube-version 1.30.0 \ + --set securityHardening.appArmorProfile.type=Localhost \ + --set securityHardening.appArmorProfile.localhostProfile=tracecore-collector-v1 \ + --show-only templates/daemonset.yaml \ + | yq '.spec.template.spec.securityContext.appArmorProfile.localhostProfile') + test "$custom" = "tracecore-collector-v1" \ + || { echo "::error::Localhost profile = $custom (expected tracecore-collector-v1)"; exit 1; } + echo "ok: type=Localhost + localhostProfile renders structured field with custom profile path" - name: priorityClassName + telemetry-off render correctness # Two value-conditional template paths that yq cannot infer from # default-render output alone: priorityClassName must appear when @@ -415,7 +500,16 @@ jobs: drop=$(echo "$ds_render" | yq '.spec.template.spec.containers[0].securityContext.capabilities.drop[0]') test "$drop" = "ALL" \ || { echo "::error::capabilities.drop[0] = $drop (expected ALL)"; exit 1; } - echo "ok: DaemonSet hardened-field assertions all green (criterion-10)" + # AppArmor structured field (M5b follow-up). The default + # kubeVersion under `helm template` (no --kube-version) is + # the embedded helm capability — currently >=1.30 on all + # supported helm releases, so the structured field is the + # expected render path for the production preset's + # production-target clusters. + aa=$(echo "$ds_render" | yq '.spec.template.spec.securityContext.appArmorProfile.type') + test "$aa" = "RuntimeDefault" \ + || { echo "::error::production preset appArmorProfile.type = $aa (expected RuntimeDefault — M5b follow-up)"; exit 1; } + echo "ok: DaemonSet hardened-field assertions all green (criterion-10 + M5b)" # Log level warn in the rendered tracecore config. cm_render=$(yq 'select(.kind == "ConfigMap" and .metadata.name == "demo-tracecore-config")' /tmp/prod-render.yaml) loglevel=$(echo "$cm_render" | yq '.data["config.yaml"]' | yq '.service.telemetry.logs.level') diff --git a/docs/followups/M5b.md b/docs/followups/M5b.md index 5ce541d0..d7ad2a7b 100644 --- a/docs/followups/M5b.md +++ b/docs/followups/M5b.md @@ -56,12 +56,14 @@ M5b scope. Order is roughly highest-leverage first. rubric explicitly. *Trigger:* M5 install-bench harness lands (its `bench/results/*.json` already captures the shape), OR a regression is reported. -- [ ] **`appArmorProfile` on the rendered DaemonSet.** Restricted PSS - *permits* AppArmor profile to be undefined, so the chart is - compliant today. Setting `RuntimeDefault` would harden against - clusters with stricter local policy and shave one item off - adopter security checklists. K8s 1.30+ uses - `pod.securityContext.appArmorProfile`; chart targets `>=1.28` - so this needs a version-gated template or values toggle. - *Trigger:* kubeVersion floor moves to >=1.30, or first adopter - asks. +- [x] **`appArmorProfile` on the rendered DaemonSet.** Shipped + proactively (sibling to L31 production-preset hardening) via + `securityHardening.appArmorProfile.enabled` (default `true`). + The chart auto-selects per `kubeVersion`: structured + `pod.securityContext.appArmorProfile: { type: RuntimeDefault }` + on K8s 1.30+, legacy + `container.apparmor.security.beta.kubernetes.io/: runtime/default` + pod annotation on 1.28 / 1.29. Cross-linked from + [`install/kubernetes/tracecore/README.md`](../../install/kubernetes/tracecore/README.md) + §"Defense-in-depth above restricted-PSS" and + [`docs/threat-model.md`](../threat-model.md) §B1. diff --git a/docs/threat-model.md b/docs/threat-model.md index 88feb5aa..c381a370 100644 --- a/docs/threat-model.md +++ b/docs/threat-model.md @@ -127,6 +127,7 @@ that are `n/a` are omitted. **M** = mitigated, **A** = accepted risk | **Tampering** | Adversary controlling another pod symlinks `/dev/kmsg` or `/var/log/journal/*.journal` to point at attacker-controlled data via mount-namespace tricks. | **M** — `hostPath` mounts resolve in the host namespace; pod-side mount-NS games cannot redirect the bind. RO mount on `/dev/kmsg`. | | **Information disclosure** | Tracecore reads kernel log lines from other tenants on the same node and emits them via OTLP to the operator's backend. | **A** — accepted by design. Operators running multi-tenant nodes (rare on training clusters) must scope tracecore install to single-tenant node pools. Documented in chart README §security. | | **DoS** | Adversary floods `/dev/kmsg` (high-rate printk) hoping to wedge the receiver and starve memory. | **M** — `journaldreceiver` + `filelogreceiver` use upstream rate-limited cursor reads with `file_storage` extension persistence; per-receiver overhead budget caps RSS at 10MB (O2). Self-telemetry alerts on `otelcol_receiver_refused_log_records_total > 0`. | +| **Elevation** | Compromised receiver parsing a hostile `/dev/kmsg` line exploits a process-level memory bug to broaden its syscall surface (e.g. `mount`, `unshare`, `ptrace` outside its own pod). | **M** — process-isolation defense-in-depth: restricted-PSS pod (`runAsNonRoot`, `readOnlyRootFilesystem`, `seccompProfile: RuntimeDefault`, `capabilities.drop: [ALL]`) + AppArmor `RuntimeDefault` profile (M5b chart follow-up — `securityHardening.appArmorProfile.enabled` default `true`; renders the GA `pod.securityContext.appArmorProfile` field on K8s 1.30+ and the legacy `container.apparmor.security.beta.kubernetes.io/` annotation on 1.28 / 1.29). Each layer narrows a different facet of the kernel surface; restricted-PSS handles capabilities/UID/RO-rootfs/seccomp, AppArmor handles file-path + per-syscall fine grain that seccomp does not express. | ### B2. Kube-apiserver diff --git a/install/kubernetes/tracecore/README.md b/install/kubernetes/tracecore/README.md index 50afb0e8..1df4ee68 100644 --- a/install/kubernetes/tracecore/README.md +++ b/install/kubernetes/tracecore/README.md @@ -164,6 +164,9 @@ automatically; PersistentVolumeClaims (if any are added via the | `containerSecurityContext.readOnlyRootFilesystem` | bool | `true` | tracecore writes only to `/tmp` (emptyDir). | | `containerSecurityContext.capabilities.drop` | list | `[ALL]` | restricted-PSS gate. | | `containerSecurityContext.capabilities.add` | list | `[]` | SYS_PTRACE is the only allowed addition; conftest rejects any other. | +| `securityHardening.appArmorProfile.enabled` | bool | `true` | Pin AppArmor `RuntimeDefault` on the DaemonSet pod (M5b). Version-gated: K8s 1.30+ renders `pod.securityContext.appArmorProfile`; 1.28 / 1.29 renders the legacy `container.apparmor.security.beta.kubernetes.io/tracecore` annotation. Auto-selected via `.Capabilities.KubeVersion.Version`. | +| `securityHardening.appArmorProfile.type` | string | `RuntimeDefault` | `RuntimeDefault` \| `Unconfined` \| `Localhost`. The latter requires `localhostProfile` (chart fails closed without it). | +| `securityHardening.appArmorProfile.localhostProfile` | string | `""` | Path within the node's AppArmor profile directory; required when `type: Localhost`. | | `telemetry.enabled` | bool | `true` | Toggle for the chart-rendered self-metrics + healthcheck surface. With `enabled: false` the chart omits both the `service.telemetry.metrics` block and the `healthcheckextension`, and the kubelet probes drop off the rendered DaemonSet. | | `telemetry.metricsListen` | string | `0.0.0.0:8888` | `service.telemetry.metrics` Prometheus-scrape listener for the collector's own metrics (chart port `telemetry`). | | `telemetry.healthListen` | string | `0.0.0.0:13133` | `healthcheckextension` listener; kubelet liveness AND readiness probes hit this port (chart port `health`). The extension serves both probes on the single path at `telemetry.healthPath` — there is no separate-path readiness endpoint. | @@ -231,6 +234,10 @@ and turns on: steady-state load. - **`tolerations: [{operator: Exists}]`** so tracecore lands on control-plane and tainted GPU pools by default. +- **AppArmor `RuntimeDefault`** (M5b follow-up) — pins the AppArmor + profile via the GA `pod.securityContext.appArmorProfile` field on + K8s 1.30+ and the legacy annotation on 1.28 / 1.29. Hardens the + syscall surface above what restricted-PSS requires. The preset assumes the cluster CNI honors NetworkPolicy (Calico / Cilium / kube-router / canal-flannel — NOT bare Flannel). @@ -583,6 +590,36 @@ by the bundled conftest policy and CI gate: | `hostIPC: false` | DaemonSet template (not values-tunable) + conftest deny | | `hostNetwork: false` | DaemonSet template (not values-tunable) + conftest deny | +### Defense-in-depth above restricted-PSS + +Restricted PSS *permits* an undefined AppArmor profile, so the chart +default values are compliant. The chart goes one step further by +pinning `RuntimeDefault` — the syscall-narrowing profile shipped with +every containerd / CRI-O package — under +`securityHardening.appArmorProfile.enabled` (default `true`). This +narrows the syscall surface a compromised receiver could reach +against the read-only `/dev/kmsg` + journald hostPath mounts; see +[`docs/threat-model.md`](../../../docs/threat-model.md) §B1 for the +boundary. + +The chart auto-selects the render form via `semverCompare` against +`.Capabilities.KubeVersion.Version`: + +- **Kubernetes 1.30+** — emits the GA structured field + `pod.securityContext.appArmorProfile: { type: RuntimeDefault }`. + Kubelet rejects pod-create on an unknown profile name (fails closed). +- **Kubernetes 1.28 / 1.29** — emits the legacy pod annotation + `container.apparmor.security.beta.kubernetes.io/tracecore: runtime/default`. + Deprecated in K8s 1.30 but still honored. Fails open + (unknown-profile name is silently dropped) — that's the upstream + semantics, not a chart bug. The 1.30 floor closes the gap. + +Operators do not pick which form renders. Toggle +`securityHardening.appArmorProfile.enabled: false` to opt out (e.g. +on Windows-node DaemonSet targets where AppArmor is irrelevant); +override `type: Localhost` + `localhostProfile: ` to wire a +node-preloaded custom profile. + ### Documented deviations The `restricted` profile permits the empty capability set only. The diff --git a/install/kubernetes/tracecore/templates/daemonset.yaml b/install/kubernetes/tracecore/templates/daemonset.yaml index 55cfa133..c7eb4b4b 100644 --- a/install/kubernetes/tracecore/templates/daemonset.yaml +++ b/install/kubernetes/tracecore/templates/daemonset.yaml @@ -28,6 +28,23 @@ spec: prometheus.io/port: {{ $metricsPort | quote }} prometheus.io/path: /metrics {{- end }} + {{- /* AppArmor pre-1.30 fallback (M5b follow-up). The + `pod.securityContext.appArmorProfile` field is GA in + Kubernetes 1.30+; older clusters honor the legacy + container-scoped annotation + `container.apparmor.security.beta.kubernetes.io/` + instead. Chart `kubeVersion` floor is `>=1.28.0-0`, so + both code paths must coexist until that floor moves to + 1.30. The annotation form is silently ignored by 1.30+ + kubelets when the structured field is also set, so it's + safe to emit only on the legacy path. + See docs/threat-model.md §B1 (host-fs reads) — the + AppArmor `runtime/default` profile narrows the syscall + surface that a compromised receiver could reach against + the read-only `/dev/kmsg` + journald hostPath mounts. */}} + {{- if and .Values.securityHardening.appArmorProfile.enabled (not (semverCompare ">=1.30.0-0" .Capabilities.KubeVersion.Version)) }} + container.apparmor.security.beta.kubernetes.io/tracecore: runtime/default + {{- end }} {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} @@ -49,7 +66,29 @@ spec: {{- with .Values.priorityClassName }} priorityClassName: {{ . | quote }} {{- end }} - securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- /* Pod-level securityContext. + On Kubernetes 1.30+ we inject the structured + `appArmorProfile` field (GA in 1.30, + https://kubernetes.io/docs/tutorials/security/apparmor/); + on 1.28 and 1.29 the legacy + `container.apparmor.security.beta.kubernetes.io/` + pod annotation above carries the same intent. The + structured form is preferred whenever available because + kubelet rejects pod-create on unknown profiles instead of + silently dropping the annotation (the deprecated + annotation path fails open). M5b follow-up. */}} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if and .Values.securityHardening.appArmorProfile.enabled (semverCompare ">=1.30.0-0" .Capabilities.KubeVersion.Version) }} + appArmorProfile: + type: {{ .Values.securityHardening.appArmorProfile.type | default "RuntimeDefault" }} + {{- if eq (.Values.securityHardening.appArmorProfile.type | default "RuntimeDefault") "Localhost" }} + {{- if not .Values.securityHardening.appArmorProfile.localhostProfile }} + {{- fail "securityHardening.appArmorProfile.type=Localhost requires securityHardening.appArmorProfile.localhostProfile to be set" }} + {{- end }} + localhostProfile: {{ .Values.securityHardening.appArmorProfile.localhostProfile | quote }} + {{- end }} + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/install/kubernetes/tracecore/values-production.yaml b/install/kubernetes/tracecore/values-production.yaml index 77fe7c11..772c8779 100644 --- a/install/kubernetes/tracecore/values-production.yaml +++ b/install/kubernetes/tracecore/values-production.yaml @@ -102,6 +102,29 @@ containerSecurityContext: drop: [ALL] add: [] +# Restricted PSS permits an undefined AppArmor profile, so the chart +# default values are compliant. The production preset hardens against +# clusters whose local policy is stricter than restricted-PSS by +# explicitly pinning `RuntimeDefault` — the syscall-narrowing profile +# shipped with every containerd / CRI-O package. M5b follow-up. +# +# Version-gating is automatic: on Kubernetes 1.30+ the template emits +# `pod.securityContext.appArmorProfile: { type: RuntimeDefault }` +# (the GA structured field); on 1.28 / 1.29 it falls back to the legacy +# `container.apparmor.security.beta.kubernetes.io/: runtime/default` +# pod annotation. Both code paths converge on identical kernel-side +# behaviour. Operators do not pick which form renders. +# +# Cross-link: `docs/threat-model.md` §B1 — narrows the syscall surface +# that a compromised receiver could reach against the read-only +# /dev/kmsg + journald hostPath mounts. + +securityHardening: + appArmorProfile: + enabled: true + type: RuntimeDefault + localhostProfile: "" + # --- network: default-deny ingress/egress (criterion-10 + #301) ----------- # # Operator MUST fill `allowedScrapers` + `allowedEgressEndpoints` diff --git a/install/kubernetes/tracecore/values.schema.json b/install/kubernetes/tracecore/values.schema.json index 70fdd29f..bdf6576f 100644 --- a/install/kubernetes/tracecore/values.schema.json +++ b/install/kubernetes/tracecore/values.schema.json @@ -99,6 +99,34 @@ } }, + "securityHardening": { + "type": "object", + "additionalProperties": false, + "description": "Defense-in-depth knobs that sit above the restricted-PSS baseline. M5b follow-up — `appArmorProfile` hardens the syscall surface so adopter security checklists no longer flag an undefined profile.", + "properties": { + "appArmorProfile": { + "type": "object", + "additionalProperties": false, + "required": ["enabled"], + "properties": { + "enabled": { + "type": "boolean", + "description": "When true, the chart emits `pod.securityContext.appArmorProfile` on Kubernetes 1.30+ and the legacy `container.apparmor.security.beta.kubernetes.io/` annotation on 1.28/1.29. The render auto-detects via `.Capabilities.KubeVersion.Version`." + }, + "type": { + "type": "string", + "enum": ["RuntimeDefault", "Unconfined", "Localhost"], + "description": "AppArmor profile type. Mirrors the upstream Kubernetes enum. Default RuntimeDefault." + }, + "localhostProfile": { + "type": "string", + "description": "Path within the node's AppArmor profile directory. Required when `type: Localhost`; the chart fails closed with a clear error if missing." + } + } + } + } + }, + "telemetry": { "type": "object", "additionalProperties": false, diff --git a/install/kubernetes/tracecore/values.yaml b/install/kubernetes/tracecore/values.yaml index a70453af..4085e57e 100644 --- a/install/kubernetes/tracecore/values.yaml +++ b/install/kubernetes/tracecore/values.yaml @@ -117,6 +117,46 @@ containerSecurityContext: drop: [ALL] add: [] +# Optional defense-in-depth hardening knobs that sit above the +# restricted-PSS baseline. Restricted PSS permits an undefined AppArmor +# profile, so the chart is compliant today; explicitly setting +# `RuntimeDefault` hardens against clusters whose local policy is +# stricter than restricted-PSS and removes one item from adopter +# security checklists (M5b follow-up). +# +# Version-gating: the structured `pod.securityContext.appArmorProfile` +# field is GA in Kubernetes 1.30+. On 1.28 / 1.29 (the chart's +# `kubeVersion` floor is `>=1.28.0-0`) the template falls back to the +# legacy `container.apparmor.security.beta.kubernetes.io/` +# pod annotation. Both code paths converge on `runtime/default`. The +# template auto-selects via `semverCompare` against +# `.Capabilities.KubeVersion.Version`; operators do not pick the +# annotation vs structured-field form. +# +# Toggle `securityHardening.appArmorProfile.enabled: false` only if: +# - The cluster runs on a Windows-node DaemonSet target (AppArmor is +# Linux-only; the field is silently accepted but has no effect, so +# disabling it is cosmetic, not functional). +# - A custom `Localhost` profile is wired via an operator overlay +# (`type: Localhost` + `localhostProfile: `). +securityHardening: + appArmorProfile: + enabled: true + # type: RuntimeDefault | Unconfined | Localhost. The chart defaults + # to RuntimeDefault — the syscall-narrowing profile shipped with + # every Linux distro's containerd / CRI-O package. Set Unconfined + # only when debugging a syscall-restriction false-positive; the + # conftest policy does NOT reject Unconfined, but the operator + # signs off on the regression. Localhost requires + # `localhostProfile` and a pre-loaded profile on every node. + type: RuntimeDefault + # Used only when `type: Localhost`. Path within the node's + # AppArmor profile directory (typically + # `/etc/apparmor.d/`) — e.g. `tracecore-collector-v1`. Empty + # otherwise; the chart fails closed with a clear error if + # `type: Localhost` is set without this field. + localhostProfile: "" + # tracecore self-telemetry surface. # # RFC-0013 PR-A2 (2026-05-30): the OCB-assembled binary uses upstream