From ede6e2c09c6297f86dbb456bb6520b910c762ea6 Mon Sep 17 00:00:00 2001 From: Tri Lam Date: Mon, 1 Jun 2026 15:50:04 -0700 Subject: [PATCH] feat(chart): typed tls.* knob mounts mTLS Secret (#301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes A13 in horizon roadmap by adding a first-class `tls.*` chart surface so operators wire cert-manager-issued client/server material without a custom DaemonSet patch overlay. Knobs (additive; default OFF): - tls.enabled — bool, default false - tls.certificateRef — kubernetes.io/tls Secret name; required when enabled (helm-template render fails closed with a clear error otherwise) - tls.mountPath — absolute dir; schema-validated `^/`; default /etc/tracecore/tls DaemonSet projects the Secret read-only (defaultMode 0400). The chart does NOT inject `tls:` clauses into the rendered config — operators wire `cert_file` / `key_file` / `ca_file` / `client_ca_file` via the free-form `config:` block referencing the projected file literals. cert-manager mTLS recipe (docs/integrations/cert-manager-mtls.md) loses the "requires a patch overlay" workaround and gains an aggregation-side example showing `client_ca_file` placement. CI gates (.github/workflows/chart.yml): five-step falsifier suite covering default-OFF render, fail-closed without ref, mount path projection, custom mountPath, and schema rejection of relative paths. NetworkPolicy template + opt-in `networkPolicy.enabled` (also #301) shipped earlier in #338 and stays unchanged here. Signed-off-by: Tri Lam --- .github/workflows/chart.yml | 64 +++++++++++++++++ docs/integrations/cert-manager-mtls.md | 68 ++++++++++++++----- install/kubernetes/tracecore/README.md | 3 + .../tracecore/templates/daemonset.yaml | 20 ++++++ .../kubernetes/tracecore/values.schema.json | 19 ++++++ install/kubernetes/tracecore/values.yaml | 48 +++++++++++++ 6 files changed, 205 insertions(+), 17 deletions(-) diff --git a/.github/workflows/chart.yml b/.github/workflows/chart.yml index 2c6ee3d2..12a422da 100644 --- a/.github/workflows/chart.yml +++ b/.github/workflows/chart.yml @@ -196,6 +196,70 @@ jobs: || { echo "::error::pyspy-on render: ConfigMap missing pyspy receiver block"; exit 1; } echo "ok: pyspy-enabled chart renders with drop:[ALL], add:[], and pyspy in receivers" + - name: tls.enabled mounts Secret + fails closed without certificateRef (#301) + # Falsifier for the typed mTLS knob: enabling `tls.enabled` + # without a `tls.certificateRef` MUST fail the render with a + # clear error (fail closed — silent disablement would ship a + # production overlay missing the cert mount). Enabling with a + # ref MUST project a `tls` volume + volumeMount into the + # DaemonSet at `tls.mountPath`. Default (knob off) MUST render + # zero `tls` volumes — the chart stays first-install + # compatible for clusters without cert-manager installed. + run: | + set -eo pipefail + # 1. Default render — no tls volume. + default_vol=$(helm template demo install/kubernetes/tracecore \ + --show-only templates/daemonset.yaml \ + | yq '.spec.template.spec.volumes[] | select(.name == "tls") | .name // ""') + test -z "$default_vol" \ + || { echo "::error::default render projected a tls volume; expected none"; exit 1; } + echo "ok: default render emits no tls volume" + # 2. tls.enabled without certificateRef — must fail closed. + if helm template demo install/kubernetes/tracecore \ + --set tls.enabled=true >/tmp/tls-bad.out 2>&1; then + echo "::error::tls.enabled=true without certificateRef rendered successfully (expected helm failure)" + cat /tmp/tls-bad.out + exit 1 + fi + grep -q 'tls.certificateRef is empty' /tmp/tls-bad.out \ + || { echo "::error::missing 'tls.certificateRef is empty' guidance in helm error"; cat /tmp/tls-bad.out; exit 1; } + echo "ok: tls.enabled without certificateRef fails closed with operator-visible error" + # 3. tls.enabled with certificateRef — Secret volume + mount. + ds_render=$(helm template demo install/kubernetes/tracecore \ + --set tls.enabled=true \ + --set tls.certificateRef=tracecore-source-client-tls \ + --show-only templates/daemonset.yaml) + secret_name=$(echo "$ds_render" | yq '.spec.template.spec.volumes[] | select(.name == "tls") | .secret.secretName') + test "$secret_name" = "tracecore-source-client-tls" \ + || { echo "::error::tls volume secretName = $secret_name (expected tracecore-source-client-tls)"; exit 1; } + mount_path=$(echo "$ds_render" | yq '.spec.template.spec.containers[0].volumeMounts[] | select(.name == "tls") | .mountPath') + test "$mount_path" = "/etc/tracecore/tls" \ + || { echo "::error::tls volumeMount path = $mount_path (expected /etc/tracecore/tls)"; exit 1; } + read_only=$(echo "$ds_render" | yq '.spec.template.spec.containers[0].volumeMounts[] | select(.name == "tls") | .readOnly') + test "$read_only" = "true" \ + || { echo "::error::tls volumeMount readOnly = $read_only (expected true)"; exit 1; } + echo "ok: tls.enabled mounts secretName=$secret_name at $mount_path readOnly=true" + # 4. Custom mountPath survives the schema's absolute-path + # rule and renders through to the volumeMount. + custom_mount=$(helm template demo install/kubernetes/tracecore \ + --set tls.enabled=true \ + --set tls.certificateRef=tracecore-source-client-tls \ + --set tls.mountPath=/var/run/secrets/tracecore-tls \ + --show-only templates/daemonset.yaml \ + | yq '.spec.template.spec.containers[0].volumeMounts[] | select(.name == "tls") | .mountPath') + test "$custom_mount" = "/var/run/secrets/tracecore-tls" \ + || { echo "::error::custom tls.mountPath rendered as $custom_mount"; exit 1; } + # 5. Non-absolute mountPath rejected by values.schema.json. + if helm template demo install/kubernetes/tracecore \ + --set tls.enabled=true \ + --set tls.certificateRef=foo \ + --set tls.mountPath=not-absolute >/tmp/tls-schema.out 2>&1; then + echo "::error::values.schema.json accepted non-absolute tls.mountPath" + exit 1 + fi + grep -q "Does not match pattern '\^/'" /tmp/tls-schema.out \ + || { echo "::error::schema error did not name the absolute-path violation"; cat /tmp/tls-schema.out; exit 1; } + echo "ok: schema rejects non-absolute tls.mountPath" - name: priorityClassName + telemetry-off render correctness # Two value-conditional template paths that yq cannot infer from # default-render output alone: priorityClassName must appear when diff --git a/docs/integrations/cert-manager-mtls.md b/docs/integrations/cert-manager-mtls.md index a50dcbe0..b1561b80 100644 --- a/docs/integrations/cert-manager-mtls.md +++ b/docs/integrations/cert-manager-mtls.md @@ -115,39 +115,73 @@ under the names the receiver/exporter `tls:` blocks expect. ## Chart wiring -The chart's structured-values `exporters.otlphttp` / `receivers.otlp` -blocks do not (yet) expose a typed `tls:` shape — the wiring runs -through the free-form `config:` deep-merge block plus extra -`volumeMounts` on the DaemonSet, both of which the chart accepts -verbatim. +The chart exposes a typed `tls.*` knob that mounts the +cert-manager-issued Secret into the DaemonSet at a known path. The +collector `tls:` clauses (which the chart does NOT inject — the OTel +component menagerie is too broad to wire blindly) reference the +projected file literals from the operator's `config:` block. ```yaml # source-cluster-values.yaml +tls: + enabled: true + # cert-manager spec.secretName from the Certificate resource above. + # The Secret carries the canonical kubernetes.io/tls keys + # (tls.crt, tls.key) plus ca.crt when the issuerRef points at a + # CA-bearing issuer (selfSigned / ca / vault / acme with ARI). The + # chart projects the entire Secret into `tls.mountPath` + # (default `/etc/tracecore/tls/`). + certificateRef: tracecore-source-client-tls + exporters: otlphttp: enabled: true endpoint: https://aggregation.tracecore.svc.cluster.local:4318 -# Override exporter tls.* via the free-form block; the structured -# `exporters.otlphttp` toggle alone does not expose tls fields yet. + +# Wire the exporter's tls.* block to the projected file literals. +# The chart deep-merges this INTO the rendered tracecore config. config: exporters: otlphttp: tls: insecure: false - ca_file: /etc/tracecore/tls/aggregation-ca.crt - cert_file: /etc/tracecore/tls/client.crt - key_file: /etc/tracecore/tls/client.key + ca_file: /etc/tracecore/tls/ca.crt + cert_file: /etc/tracecore/tls/tls.crt + key_file: /etc/tracecore/tls/tls.key reload_interval: 1h -# Mount the cert-manager-issued Secrets into the path the tls block -# above expects. Requires a patch overlay because the chart's -# DaemonSet template does not yet expose a typed `extraVolumes` knob. -# Tracked separately as a chart-shape ergonomics issue. ``` For the aggregation side, mirror the shape into `receivers.otlp.tls` -with the `client_ca_file` line that makes the listener REQUIRE client -auth — without it the receiver still serves TLS but does not verify -client certs, silently degrading to one-way TLS. +and add `client_ca_file` — that field is what makes the listener +REQUIRE client auth. Without it the receiver still serves TLS but does +not verify client certs, silently degrading to one-way TLS: + +```yaml +# aggregation-cluster-values.yaml +tls: + enabled: true + certificateRef: tracecore-aggregation-server-tls + +config: + receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + tls: + cert_file: /etc/tracecore/tls/tls.crt + key_file: /etc/tracecore/tls/tls.key + client_ca_file: /etc/tracecore/tls/ca.crt + reload_interval: 1h +``` + +### What `tls.enabled` does and does NOT do + +| Does | Does NOT | +|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------| +| Mounts `tls.certificateRef` (a Secret) read-only at `tls.mountPath` | Inject any `tls:` clause into the rendered tracecore config — operators wire those via `config:` | +| Fails the helm-template render closed if `tls.certificateRef` is empty | Create the Secret or the cert-manager `Certificate` — both are operator-managed | +| Validates `tls.mountPath` is absolute (`^/`) via `values.schema.json` | Validate the Secret's keys exist; mismatched key names surface at pod start as `no such file or directory` | ## Renewal diff --git a/install/kubernetes/tracecore/README.md b/install/kubernetes/tracecore/README.md index 942db16f..1522d269 100644 --- a/install/kubernetes/tracecore/README.md +++ b/install/kubernetes/tracecore/README.md @@ -195,6 +195,9 @@ automatically; PersistentVolumeClaims (if any are added via the | `networkPolicy.allowedEgressEndpoints` | list | `[]` | `{cidr, port, protocol, except?}` entries for OTLP-out. Operator declares so the policy is auditable. | | `networkPolicy.dnsNamespaceSelector` | map | `{kubernetes.io/metadata.name: kube-system}` | DNS resolver namespace label. Override if your DNS lives elsewhere. | | `networkPolicy.dnsPodSelector` | map | `{k8s-app: kube-dns}` | DNS resolver pod label. Override for non-coredns/kube-dns setups. | +| `tls.enabled` | bool | `false` | Mount a `kubernetes.io/tls` Secret (typically [cert-manager](../../../docs/integrations/cert-manager-mtls.md)-issued) into the DaemonSet at `tls.mountPath`. Operators wire `tls.cert_file` / `tls.key_file` / `tls.ca_file` (or `client_ca_file`) into the free-form `config:` block referencing the projected file literals; the chart does NOT inject `tls:` clauses (#301). | +| `tls.certificateRef` | string | `""` | Name of the `kubernetes.io/tls` Secret in `.Values.namespace`. Required when `tls.enabled` is true; the helm-template render fails closed with a clear error if empty. | +| `tls.mountPath` | string | `/etc/tracecore/tls` | Absolute directory the Secret projects into. Schema-validated `^/`. Path literals across `docs/integrations/` assume the default. | The chart's authoritative defaults live in [`values.yaml`](./values.yaml); the table above is a narrative diff --git a/install/kubernetes/tracecore/templates/daemonset.yaml b/install/kubernetes/tracecore/templates/daemonset.yaml index 8b6dbea4..55cfa133 100644 --- a/install/kubernetes/tracecore/templates/daemonset.yaml +++ b/install/kubernetes/tracecore/templates/daemonset.yaml @@ -110,9 +110,29 @@ spec: readOnly: true - name: tmp mountPath: /tmp + {{- if .Values.tls.enabled }} + {{- if not .Values.tls.certificateRef }} + {{- fail "tls.enabled is true but tls.certificateRef is empty — set tls.certificateRef to the kubernetes.io/tls Secret name (typically cert-manager's spec.secretName)" }} + {{- end }} + - name: tls + mountPath: {{ .Values.tls.mountPath | quote }} + readOnly: true + {{- end }} volumes: - name: config configMap: name: {{ include "tracecore.fullname" . }}-config - name: tmp emptyDir: {} + {{- if .Values.tls.enabled }} + - name: tls + secret: + secretName: {{ .Values.tls.certificateRef | quote }} + # cert-manager writes tls.crt, tls.key, and (when issuerRef + # points at a CA-bearing issuer) ca.crt. Other PKI sources + # may omit ca.crt — the OTel `tls:` block falls back to the + # OS trust store if `ca_file` is not set, so absence is + # operator-visible rather than a chart-render error. + defaultMode: 0400 + optional: false + {{- end }} diff --git a/install/kubernetes/tracecore/values.schema.json b/install/kubernetes/tracecore/values.schema.json index 671fe1ae..70fdd29f 100644 --- a/install/kubernetes/tracecore/values.schema.json +++ b/install/kubernetes/tracecore/values.schema.json @@ -244,6 +244,25 @@ "dnsNamespaceSelector": { "type": "object" }, "dnsPodSelector": { "type": "object" } } + }, + + "tls": { + "type": "object", + "additionalProperties": false, + "required": ["enabled"], + "description": "mTLS material wiring (issue #301). When enabled, mounts a kubernetes.io/tls Secret (typically cert-manager-issued) into the DaemonSet at `mountPath`. The chart does NOT inject `tls:` clauses into the rendered tracecore config — operators wire those via the free-form `config:` block referencing `/{tls.crt,tls.key,ca.crt}`.", + "properties": { + "enabled": { "type": "boolean" }, + "certificateRef": { + "type": "string", + "description": "Name of a kubernetes.io/tls Secret in `.Values.namespace`. Required when `enabled` is true; the chart fails the helm-template render with a clear error if missing." + }, + "mountPath": { + "type": "string", + "pattern": "^/", + "description": "Absolute directory the Secret projects into. Default `/etc/tracecore/tls` matches the path literals across `docs/integrations/`." + } + } } }, diff --git a/install/kubernetes/tracecore/values.yaml b/install/kubernetes/tracecore/values.yaml index fcea446e..a70453af 100644 --- a/install/kubernetes/tracecore/values.yaml +++ b/install/kubernetes/tracecore/values.yaml @@ -340,3 +340,51 @@ networkPolicy: kubernetes.io/metadata.name: kube-system dnsPodSelector: k8s-app: kube-dns + +# mTLS material wiring (issue #301). When enabled, the chart mounts a +# Kubernetes Secret (typically cert-manager-issued via a `Certificate` +# resource — see `docs/integrations/cert-manager-mtls.md`) into the +# DaemonSet at `tls.mountPath`. cert-manager writes the canonical +# Secret keys `tls.crt`, `tls.key`, and `ca.crt`; OTel collector +# `tls:` blocks reference them as the worked examples below. +# +# This knob ONLY mounts the cert material; the chart does NOT inject +# `tls:` clauses into the rendered tracecore config. Operators wire +# `tls.cert_file` / `tls.key_file` / `tls.ca_file` (or +# `client_ca_file` for receivers requiring client-auth) into their +# `config:` block. The path literals MUST match `tls.mountPath` +# (default `/etc/tracecore/tls/`). +# +# Worked exporter-side overlay (source cluster pushing OTLP to a +# mutually-authenticated aggregation listener): +# +# tls: +# enabled: true +# certificateRef: tracecore-source-client-tls +# config: +# exporters: +# otlphttp: +# tls: +# insecure: false +# ca_file: /etc/tracecore/tls/ca.crt +# cert_file: /etc/tracecore/tls/tls.crt +# key_file: /etc/tracecore/tls/tls.key +# reload_interval: 1h +# +# Default OFF so the chart's first-install path stays compatible with +# clusters that have no cert-manager installed. +# +# certificateRef: name of a `kubernetes.io/tls` Secret in the chart's +# namespace (`.Values.namespace`). The chart does NOT create the +# Secret; cert-manager (or another PKI source) writes it. Empty when +# `enabled: false`; required when `enabled: true` — the +# helm-template render fails closed with a clear error if missing. +# +# mountPath: directory the Secret projects into. Operators rarely +# tune this; doc literals across `docs/integrations/` assume the +# default. Change in lockstep with the `config:` block's `*_file` +# paths. +tls: + enabled: false + certificateRef: "" + mountPath: /etc/tracecore/tls