Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 95 additions & 1 deletion .github/workflows/chart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,91 @@ jobs:
grep -q "Does not match pattern '\^/'" /tmp/tls-schema.out \
|| { echo "::error::schema error did not name the absolute-path violation"; cat /tmp/tls-schema.out; exit 1; }
echo "ok: schema rejects non-absolute tls.mountPath"
- name: AppArmor profile version-gating (M5b follow-up)
# Falsifier for the M5b appArmor follow-up. Restricted PSS
# permits an undefined AppArmor profile, so the chart was
# compliant without this knob; explicit RuntimeDefault hardens
# against clusters whose local policy is stricter. The
# structured `pod.securityContext.appArmorProfile` field is GA
# in K8s 1.30+; on 1.28 / 1.29 the legacy
# `container.apparmor.security.beta.kubernetes.io/<container>`
# pod annotation carries the same intent. The chart auto-
# selects via semverCompare against
# .Capabilities.KubeVersion.Version — operators do not pick.
#
# Six mutation checks bound the contract:
# 1. K8s 1.30 + default values -> structured field rendered,
# legacy annotation absent.
# 2. K8s 1.28 + default values -> legacy annotation rendered,
# structured field absent.
# 3. K8s 1.30 + toggle off -> neither rendered.
# 4. K8s 1.28 + toggle off -> neither rendered.
# 5. type=Localhost without
# localhostProfile -> render fails closed with
# operator-visible error.
run: |
set -eo pipefail
# 1. K8s 1.30 default: structured field, no annotation.
r130=$(helm template demo install/kubernetes/tracecore \
--kube-version 1.30.0 \
--show-only templates/daemonset.yaml)
aa_type=$(echo "$r130" | yq '.spec.template.spec.securityContext.appArmorProfile.type')
test "$aa_type" = "RuntimeDefault" \
|| { echo "::error::K8s 1.30 default: appArmorProfile.type = $aa_type (expected RuntimeDefault)"; exit 1; }
legacy=$(echo "$r130" | yq '.spec.template.metadata.annotations | with_entries(select(.key | test("apparmor"))) | length')
test "$legacy" = "0" \
|| { echo "::error::K8s 1.30 default: legacy apparmor annotation rendered (expected absent)"; exit 1; }
echo "ok: K8s 1.30 default — structured field RuntimeDefault, no legacy annotation"
# 2. K8s 1.28 default: legacy annotation, no structured field.
r128=$(helm template demo install/kubernetes/tracecore \
--kube-version 1.28.0 \
--show-only templates/daemonset.yaml)
ann=$(echo "$r128" | yq '.spec.template.metadata.annotations["container.apparmor.security.beta.kubernetes.io/tracecore"]')
test "$ann" = "runtime/default" \
|| { echo "::error::K8s 1.28 default: legacy annotation = $ann (expected runtime/default)"; exit 1; }
structured=$(echo "$r128" | yq '.spec.template.spec.securityContext | has("appArmorProfile")')
test "$structured" = "false" \
|| { echo "::error::K8s 1.28 default: structured appArmorProfile rendered (expected absent — GA in 1.30+)"; exit 1; }
echo "ok: K8s 1.28 default — legacy annotation runtime/default, no structured field"
# 3. K8s 1.30 toggle off: neither rendered.
r130_off=$(helm template demo install/kubernetes/tracecore \
--kube-version 1.30.0 \
--set securityHardening.appArmorProfile.enabled=false \
--show-only templates/daemonset.yaml)
structured_off=$(echo "$r130_off" | yq '.spec.template.spec.securityContext | has("appArmorProfile")')
test "$structured_off" = "false" \
|| { echo "::error::K8s 1.30 toggle off: structured appArmorProfile rendered (expected absent)"; exit 1; }
# 4. K8s 1.28 toggle off: neither rendered.
r128_off=$(helm template demo install/kubernetes/tracecore \
--kube-version 1.28.0 \
--set securityHardening.appArmorProfile.enabled=false \
--show-only templates/daemonset.yaml)
legacy_off=$(echo "$r128_off" | yq '.spec.template.metadata.annotations | with_entries(select(.key | test("apparmor"))) | length')
test "$legacy_off" = "0" \
|| { echo "::error::K8s 1.28 toggle off: legacy apparmor annotation rendered (expected absent)"; exit 1; }
echo "ok: toggle off — neither code path renders on 1.30 or 1.28"
# 5. type=Localhost without localhostProfile fails closed.
if helm template demo install/kubernetes/tracecore \
--kube-version 1.30.0 \
--set securityHardening.appArmorProfile.type=Localhost \
--show-only templates/daemonset.yaml >/tmp/aa-bad.out 2>&1; then
echo "::error::type=Localhost without localhostProfile rendered successfully (expected helm failure)"
cat /tmp/aa-bad.out
exit 1
fi
grep -q "localhostProfile to be set" /tmp/aa-bad.out \
|| { echo "::error::missing 'localhostProfile to be set' guidance in helm error"; cat /tmp/aa-bad.out; exit 1; }
echo "ok: type=Localhost without localhostProfile fails closed with operator-visible error"
# 6. type=Localhost with profile renders the path through.
custom=$(helm template demo install/kubernetes/tracecore \
--kube-version 1.30.0 \
--set securityHardening.appArmorProfile.type=Localhost \
--set securityHardening.appArmorProfile.localhostProfile=tracecore-collector-v1 \
--show-only templates/daemonset.yaml \
| yq '.spec.template.spec.securityContext.appArmorProfile.localhostProfile')
test "$custom" = "tracecore-collector-v1" \
|| { echo "::error::Localhost profile = $custom (expected tracecore-collector-v1)"; exit 1; }
echo "ok: type=Localhost + localhostProfile renders structured field with custom profile path"
- name: priorityClassName + telemetry-off render correctness
# Two value-conditional template paths that yq cannot infer from
# default-render output alone: priorityClassName must appear when
Expand Down Expand Up @@ -415,7 +500,16 @@ jobs:
drop=$(echo "$ds_render" | yq '.spec.template.spec.containers[0].securityContext.capabilities.drop[0]')
test "$drop" = "ALL" \
|| { echo "::error::capabilities.drop[0] = $drop (expected ALL)"; exit 1; }
echo "ok: DaemonSet hardened-field assertions all green (criterion-10)"
# AppArmor structured field (M5b follow-up). The default
# kubeVersion under `helm template` (no --kube-version) is
# the embedded helm capability — currently >=1.30 on all
# supported helm releases, so the structured field is the
# expected render path for the production preset's
# production-target clusters.
aa=$(echo "$ds_render" | yq '.spec.template.spec.securityContext.appArmorProfile.type')
test "$aa" = "RuntimeDefault" \
|| { echo "::error::production preset appArmorProfile.type = $aa (expected RuntimeDefault — M5b follow-up)"; exit 1; }
echo "ok: DaemonSet hardened-field assertions all green (criterion-10 + M5b)"
# Log level warn in the rendered tracecore config.
cm_render=$(yq 'select(.kind == "ConfigMap" and .metadata.name == "demo-tracecore-config")' /tmp/prod-render.yaml)
loglevel=$(echo "$cm_render" | yq '.data["config.yaml"]' | yq '.service.telemetry.logs.level')
Expand Down
20 changes: 11 additions & 9 deletions docs/followups/M5b.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ M5b scope. Order is roughly highest-leverage first.
rubric explicitly. *Trigger:* M5 install-bench harness lands
(its `bench/results/*.json` already captures the shape), OR a
regression is reported.
- [ ] **`appArmorProfile` on the rendered DaemonSet.** Restricted PSS
*permits* AppArmor profile to be undefined, so the chart is
compliant today. Setting `RuntimeDefault` would harden against
clusters with stricter local policy and shave one item off
adopter security checklists. K8s 1.30+ uses
`pod.securityContext.appArmorProfile`; chart targets `>=1.28`
so this needs a version-gated template or values toggle.
*Trigger:* kubeVersion floor moves to >=1.30, or first adopter
asks.
- [x] **`appArmorProfile` on the rendered DaemonSet.** Shipped
proactively (sibling to L31 production-preset hardening) via
`securityHardening.appArmorProfile.enabled` (default `true`).
The chart auto-selects per `kubeVersion`: structured
`pod.securityContext.appArmorProfile: { type: RuntimeDefault }`
on K8s 1.30+, legacy
`container.apparmor.security.beta.kubernetes.io/<container>: runtime/default`
pod annotation on 1.28 / 1.29. Cross-linked from
[`install/kubernetes/tracecore/README.md`](../../install/kubernetes/tracecore/README.md)
§"Defense-in-depth above restricted-PSS" and
[`docs/threat-model.md`](../threat-model.md) §B1.
1 change: 1 addition & 0 deletions docs/threat-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ that are `n/a` are omitted. **M** = mitigated, **A** = accepted risk
| **Tampering** | Adversary controlling another pod symlinks `/dev/kmsg` or `/var/log/journal/*.journal` to point at attacker-controlled data via mount-namespace tricks. | **M** — `hostPath` mounts resolve in the host namespace; pod-side mount-NS games cannot redirect the bind. RO mount on `/dev/kmsg`. |
| **Information disclosure** | Tracecore reads kernel log lines from other tenants on the same node and emits them via OTLP to the operator's backend. | **A** — accepted by design. Operators running multi-tenant nodes (rare on training clusters) must scope tracecore install to single-tenant node pools. Documented in chart README §security. |
| **DoS** | Adversary floods `/dev/kmsg` (high-rate printk) hoping to wedge the receiver and starve memory. | **M** — `journaldreceiver` + `filelogreceiver` use upstream rate-limited cursor reads with `file_storage` extension persistence; per-receiver overhead budget caps RSS at 10MB (O2). Self-telemetry alerts on `otelcol_receiver_refused_log_records_total > 0`. |
| **Elevation** | Compromised receiver parsing a hostile `/dev/kmsg` line exploits a process-level memory bug to broaden its syscall surface (e.g. `mount`, `unshare`, `ptrace` outside its own pod). | **M** — process-isolation defense-in-depth: restricted-PSS pod (`runAsNonRoot`, `readOnlyRootFilesystem`, `seccompProfile: RuntimeDefault`, `capabilities.drop: [ALL]`) + AppArmor `RuntimeDefault` profile (M5b chart follow-up — `securityHardening.appArmorProfile.enabled` default `true`; renders the GA `pod.securityContext.appArmorProfile` field on K8s 1.30+ and the legacy `container.apparmor.security.beta.kubernetes.io/<container>` annotation on 1.28 / 1.29). Each layer narrows a different facet of the kernel surface; restricted-PSS handles capabilities/UID/RO-rootfs/seccomp, AppArmor handles file-path + per-syscall fine grain that seccomp does not express. |

### B2. Kube-apiserver

Expand Down
37 changes: 37 additions & 0 deletions install/kubernetes/tracecore/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ automatically; PersistentVolumeClaims (if any are added via the
| `containerSecurityContext.readOnlyRootFilesystem` | bool | `true` | tracecore writes only to `/tmp` (emptyDir). |
| `containerSecurityContext.capabilities.drop` | list | `[ALL]` | restricted-PSS gate. |
| `containerSecurityContext.capabilities.add` | list | `[]` | SYS_PTRACE is the only allowed addition; conftest rejects any other. |
| `securityHardening.appArmorProfile.enabled` | bool | `true` | Pin AppArmor `RuntimeDefault` on the DaemonSet pod (M5b). Version-gated: K8s 1.30+ renders `pod.securityContext.appArmorProfile`; 1.28 / 1.29 renders the legacy `container.apparmor.security.beta.kubernetes.io/tracecore` annotation. Auto-selected via `.Capabilities.KubeVersion.Version`. |
| `securityHardening.appArmorProfile.type` | string | `RuntimeDefault` | `RuntimeDefault` \| `Unconfined` \| `Localhost`. The latter requires `localhostProfile` (chart fails closed without it). |
| `securityHardening.appArmorProfile.localhostProfile` | string | `""` | Path within the node's AppArmor profile directory; required when `type: Localhost`. |
| `telemetry.enabled` | bool | `true` | Toggle for the chart-rendered self-metrics + healthcheck surface. With `enabled: false` the chart omits both the `service.telemetry.metrics` block and the `healthcheckextension`, and the kubelet probes drop off the rendered DaemonSet. |
| `telemetry.metricsListen` | string | `0.0.0.0:8888` | `service.telemetry.metrics` Prometheus-scrape listener for the collector's own metrics (chart port `telemetry`). |
| `telemetry.healthListen` | string | `0.0.0.0:13133` | `healthcheckextension` listener; kubelet liveness AND readiness probes hit this port (chart port `health`). The extension serves both probes on the single path at `telemetry.healthPath` — there is no separate-path readiness endpoint. |
Expand Down Expand Up @@ -231,6 +234,10 @@ and turns on:
steady-state load.
- **`tolerations: [{operator: Exists}]`** so tracecore lands on
control-plane and tainted GPU pools by default.
- **AppArmor `RuntimeDefault`** (M5b follow-up) — pins the AppArmor
profile via the GA `pod.securityContext.appArmorProfile` field on
K8s 1.30+ and the legacy annotation on 1.28 / 1.29. Hardens the
syscall surface above what restricted-PSS requires.

The preset assumes the cluster CNI honors NetworkPolicy
(Calico / Cilium / kube-router / canal-flannel — NOT bare Flannel).
Expand Down Expand Up @@ -583,6 +590,36 @@ by the bundled conftest policy and CI gate:
| `hostIPC: false` | DaemonSet template (not values-tunable) + conftest deny |
| `hostNetwork: false` | DaemonSet template (not values-tunable) + conftest deny |

### Defense-in-depth above restricted-PSS

Restricted PSS *permits* an undefined AppArmor profile, so the chart
default values are compliant. The chart goes one step further by
pinning `RuntimeDefault` — the syscall-narrowing profile shipped with
every containerd / CRI-O package — under
`securityHardening.appArmorProfile.enabled` (default `true`). This
narrows the syscall surface a compromised receiver could reach
against the read-only `/dev/kmsg` + journald hostPath mounts; see
[`docs/threat-model.md`](../../../docs/threat-model.md) §B1 for the
boundary.

The chart auto-selects the render form via `semverCompare` against
`.Capabilities.KubeVersion.Version`:

- **Kubernetes 1.30+** — emits the GA structured field
`pod.securityContext.appArmorProfile: { type: RuntimeDefault }`.
Kubelet rejects pod-create on an unknown profile name (fails closed).
- **Kubernetes 1.28 / 1.29** — emits the legacy pod annotation
`container.apparmor.security.beta.kubernetes.io/tracecore: runtime/default`.
Deprecated in K8s 1.30 but still honored. Fails open
(unknown-profile name is silently dropped) — that's the upstream
semantics, not a chart bug. The 1.30 floor closes the gap.

Operators do not pick which form renders. Toggle
`securityHardening.appArmorProfile.enabled: false` to opt out (e.g.
on Windows-node DaemonSet targets where AppArmor is irrelevant);
override `type: Localhost` + `localhostProfile: <name>` to wire a
node-preloaded custom profile.

### Documented deviations

The `restricted` profile permits the empty capability set only. The
Expand Down
41 changes: 40 additions & 1 deletion install/kubernetes/tracecore/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,23 @@ spec:
prometheus.io/port: {{ $metricsPort | quote }}
prometheus.io/path: /metrics
{{- end }}
{{- /* AppArmor pre-1.30 fallback (M5b follow-up). The
`pod.securityContext.appArmorProfile` field is GA in
Kubernetes 1.30+; older clusters honor the legacy
container-scoped annotation
`container.apparmor.security.beta.kubernetes.io/<container>`
instead. Chart `kubeVersion` floor is `>=1.28.0-0`, so
both code paths must coexist until that floor moves to
1.30. The annotation form is silently ignored by 1.30+
kubelets when the structured field is also set, so it's
safe to emit only on the legacy path.
See docs/threat-model.md §B1 (host-fs reads) — the
AppArmor `runtime/default` profile narrows the syscall
surface that a compromised receiver could reach against
the read-only `/dev/kmsg` + journald hostPath mounts. */}}
{{- if and .Values.securityHardening.appArmorProfile.enabled (not (semverCompare ">=1.30.0-0" .Capabilities.KubeVersion.Version)) }}
container.apparmor.security.beta.kubernetes.io/tracecore: runtime/default
{{- end }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand All @@ -49,7 +66,29 @@ spec:
{{- with .Values.priorityClassName }}
priorityClassName: {{ . | quote }}
{{- end }}
securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- /* Pod-level securityContext.
On Kubernetes 1.30+ we inject the structured
`appArmorProfile` field (GA in 1.30,
https://kubernetes.io/docs/tutorials/security/apparmor/);
on 1.28 and 1.29 the legacy
`container.apparmor.security.beta.kubernetes.io/<container>`
pod annotation above carries the same intent. The
structured form is preferred whenever available because
kubelet rejects pod-create on unknown profiles instead of
silently dropping the annotation (the deprecated
annotation path fails open). M5b follow-up. */}}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if and .Values.securityHardening.appArmorProfile.enabled (semverCompare ">=1.30.0-0" .Capabilities.KubeVersion.Version) }}
appArmorProfile:
type: {{ .Values.securityHardening.appArmorProfile.type | default "RuntimeDefault" }}
{{- if eq (.Values.securityHardening.appArmorProfile.type | default "RuntimeDefault") "Localhost" }}
{{- if not .Values.securityHardening.appArmorProfile.localhostProfile }}
{{- fail "securityHardening.appArmorProfile.type=Localhost requires securityHardening.appArmorProfile.localhostProfile to be set" }}
{{- end }}
localhostProfile: {{ .Values.securityHardening.appArmorProfile.localhostProfile | quote }}
{{- end }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector: {{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
23 changes: 23 additions & 0 deletions install/kubernetes/tracecore/values-production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,29 @@ containerSecurityContext:
drop: [ALL]
add: []

# Restricted PSS permits an undefined AppArmor profile, so the chart
# default values are compliant. The production preset hardens against
# clusters whose local policy is stricter than restricted-PSS by
# explicitly pinning `RuntimeDefault` — the syscall-narrowing profile
# shipped with every containerd / CRI-O package. M5b follow-up.
#
# Version-gating is automatic: on Kubernetes 1.30+ the template emits
# `pod.securityContext.appArmorProfile: { type: RuntimeDefault }`
# (the GA structured field); on 1.28 / 1.29 it falls back to the legacy
# `container.apparmor.security.beta.kubernetes.io/<container>: runtime/default`
# pod annotation. Both code paths converge on identical kernel-side
# behaviour. Operators do not pick which form renders.
#
# Cross-link: `docs/threat-model.md` §B1 — narrows the syscall surface
# that a compromised receiver could reach against the read-only
# /dev/kmsg + journald hostPath mounts.

securityHardening:
appArmorProfile:
enabled: true
type: RuntimeDefault
localhostProfile: ""

# --- network: default-deny ingress/egress (criterion-10 + #301) -----------
#
# Operator MUST fill `allowedScrapers` + `allowedEgressEndpoints`
Expand Down
Loading
Loading