From e3bfead8e329cdfee8c0a3545c5a37e950a839ec Mon Sep 17 00:00:00 2001 From: Tri Lam Date: Sat, 30 May 2026 23:02:51 -0700 Subject: [PATCH 1/2] feat(pivot): port otlphttp off internal/selftelemetry (PR-F unblock) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling-port the otlphttp exporter to a package-local self-telemetry surface so PR-F can delete `internal/selftelemetry`. Mirrors the stdoutexporter precedent (PR #186): selftel.go owns `kind`, `selfExporter`, `newSelfExporter`, and `recordInitError`; the instrumentation scope name pins to the exporter's Go import path. Metric contract preserved: `tracecore.exporter.calls_total{result, kind, component_id}` with the same three kind values (marshal, io, downstream) the v0.1.x exporter emitted; dashboards / alerts keyed on the counter do not regress. `init_errors_total` ticks on register failure via the same factory fallback shape as stdoutexporter. ExporterCarrier dropped — there is no current production consumer of `SelfExporter()` in this tree and PR-F deletes the contract anyway. The runtime degrades to the documented "no per-exporter signal" mode; `tracecore.exporter.failure_rate` still appears via the SLO observable gauge. Documented at newSelfTelemetry. TDD red-then-green: 8 new sibling tests cover noop safety, nil-MP error, calls_total emission with all 3 kinds, scope-name pin, init_errors shape, factory fallback (failing meter), nil-MP factory path, and the sibling-types compile pin. classify_internal_test.go rewired to the package-local kind type. All new + updated + pre-existing tests green; make check clean. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Tri Lam --- components/exporters/otlphttp/README.md | 2 +- .../otlphttp/classify_internal_test.go | 12 +- components/exporters/otlphttp/otlphttp.go | 70 +-- components/exporters/otlphttp/selftel.go | 173 ++++++++ components/exporters/otlphttp/selftel_test.go | 407 ++++++++++++++++++ 5 files changed, 632 insertions(+), 32 deletions(-) create mode 100644 components/exporters/otlphttp/selftel.go create mode 100644 components/exporters/otlphttp/selftel_test.go diff --git a/components/exporters/otlphttp/README.md b/components/exporters/otlphttp/README.md index c8612ea6..8a1e556b 100644 --- a/components/exporters/otlphttp/README.md +++ b/components/exporters/otlphttp/README.md @@ -60,7 +60,7 @@ Operator-supplied `headers` are sent verbatim on every outgoing request; useful ## Self-telemetry labels -The exporter increments `tracecore.exporter.calls_total{result, kind}` on every Consume*. The `kind` values emitted by otlphttp are NOT in the canonical set at `internal/selftelemetry/interface.go`; they're exporter-local low-cardinality strings: +The exporter increments `tracecore.exporter.calls_total{result, kind, component_id}` on every Consume*. The `kind` values emitted by otlphttp are exporter-local low-cardinality strings declared in [`selftel.go`](selftel.go) (sibling-scoped, package-local — see RFC-0013 §migration PR-B1; the `internal/selftelemetry` canonical set is being deleted in PR-F): | `kind` | When | Operator first-step | |---|---|---| diff --git a/components/exporters/otlphttp/classify_internal_test.go b/components/exporters/otlphttp/classify_internal_test.go index 44fe5798..8e9cf167 100644 --- a/components/exporters/otlphttp/classify_internal_test.go +++ b/components/exporters/otlphttp/classify_internal_test.go @@ -13,14 +13,12 @@ import ( "testing" "github.com/stretchr/testify/require" - - "github.com/tracecoreai/tracecore/internal/selftelemetry" ) func TestClassifyKind_PermanentStatusRoutesToDownstream(t *testing.T) { t.Parallel() err := fmt.Errorf("%w 400", errPermanentStatus) - require.Equal(t, selftelemetry.Kind("downstream"), classifyKind(err)) + require.Equal(t, kindDownstream, classifyKind(err)) } func TestClassifyKind_RetryableStatusRoutesToDownstream(t *testing.T) { @@ -30,22 +28,22 @@ func TestClassifyKind_RetryableStatusRoutesToDownstream(t *testing.T) { // io because the error message was "retryable status N", not // "permanent status N". Reviewer P3-Rev1#7/Rev2-F3. err := fmt.Errorf("%w 503", errRetryableStatus) - require.Equal(t, selftelemetry.Kind("downstream"), classifyKind(err)) + require.Equal(t, kindDownstream, classifyKind(err)) } func TestClassifyKind_CtxCanceledRoutesToIO(t *testing.T) { t.Parallel() - require.Equal(t, selftelemetry.Kind("io"), classifyKind(context.Canceled)) + require.Equal(t, kindIO, classifyKind(context.Canceled)) } func TestClassifyKind_CtxDeadlineExceededRoutesToIO(t *testing.T) { t.Parallel() - require.Equal(t, selftelemetry.Kind("io"), classifyKind(context.DeadlineExceeded)) + require.Equal(t, kindIO, classifyKind(context.DeadlineExceeded)) } func TestClassifyKind_GenericErrorRoutesToIO(t *testing.T) { t.Parallel() // Network-shaped errors (DNS, TLS, dial) fall through to the // default branch and are tagged as io. README contract. - require.Equal(t, selftelemetry.Kind("io"), classifyKind(errors.New("dial tcp: no such host"))) + require.Equal(t, kindIO, classifyKind(errors.New("dial tcp: no such host"))) } diff --git a/components/exporters/otlphttp/otlphttp.go b/components/exporters/otlphttp/otlphttp.go index 3c3530bc..c2be2129 100644 --- a/components/exporters/otlphttp/otlphttp.go +++ b/components/exporters/otlphttp/otlphttp.go @@ -27,7 +27,6 @@ import ( "github.com/tracecoreai/tracecore/internal/consumer" "github.com/tracecoreai/tracecore/internal/pipeline" - "github.com/tracecoreai/tracecore/internal/selftelemetry" ) // signal disambiguates which OTLP/HTTP path the exporter is bound @@ -82,15 +81,10 @@ const ( maxResponseReadBytes = 64 * 1024 ) -// Exporter-local selftelemetry kinds. `selftelemetry.Kind` is a -// string type and the interface explicitly allows per-component -// vocabulary; we declare ours so the exported metric labels stay -// low-cardinality (just these three values). -const ( - kindMarshal selftelemetry.Kind = "marshal" - kindIO selftelemetry.Kind = "io" - kindDownstream = selftelemetry.KindDownstream -) +// Self-telemetry types (kind, selfExporter) live in `selftel.go` +// (sibling-scoped, package-local), not in `internal/selftelemetry` — +// see RFC-0013 §migration. PR-F deletes `internal/selftelemetry`; the +// sibling pattern is what survives. // otlpExporter is the per-signal exporter the factory hands out. // Goroutine-safe; net/http.Client handles concurrent calls. @@ -99,7 +93,7 @@ type otlpExporter struct { id pipeline.ID logger *slog.Logger - telemetry selftelemetry.Exporter + telemetry selfExporter transport *http.Transport client *http.Client cfg *Config @@ -141,7 +135,7 @@ var ( errRetryableStatus = errors.New("otlphttp: server returned retryable status") ) -func newExporter(_ context.Context, set pipeline.CreateSettings, cfg *Config, sig signal) (*otlpExporter, error) { +func newExporter(ctx context.Context, set pipeline.CreateSettings, cfg *Config, sig signal) (*otlpExporter, error) { endpoint, err := resolveEndpoint(cfg, sig) if err != nil { return nil, err @@ -174,11 +168,11 @@ func newExporter(_ context.Context, set pipeline.CreateSettings, cfg *Config, si transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec // operator-opted via config } - te := newSelfTelemetry(set) logger := set.Telemetry.Logger if logger == nil { logger = slog.New(slog.NewTextHandler(io.Discard, nil)) } + te := newSelfTelemetry(ctx, set, logger) // insecure_skip_verify regressions get inherited from dev YAML into // prod. WARN at startup so operators grepping pod logs catch it // without having to render the ConfigMap. @@ -282,23 +276,51 @@ func buildUserAgent(bi pipeline.BuildInfo) string { } // newSelfTelemetry wires the per-exporter self-telemetry handle. -// Returns a no-op when the MeterProvider is absent so the hot path -// never has to nil-check. -func newSelfTelemetry(set pipeline.CreateSettings) selftelemetry.Exporter { +// Returns a no-op when the MeterProvider is absent or instrument +// registration fails; the register-failure path also ticks +// `tracecore.selftelemetry.init_errors_total` via recordInitError so +// operators can alert on > 0. Mirrors the stdoutexporter sibling — +// same wire shape, no internal/selftelemetry import. +// +// NOTE on ExporterCarrier removal: +// +// v0.1.x otlphttp exposed `SelfExporter() selftelemetry.Exporter` so +// the runtime's reader-collection path could feed +// `tracecore.exporter.failure_rate`. The PR-B1 sibling port drops the +// `selftelemetry.ExporterCarrier` implementation: +// +// - The runtime path that consumed ExporterCarrier (`cmd/tracecore` +// in v0.1.x) silently skipped components that didn't implement it. +// There is no current production caller in this tree; the v0.1.x +// ConsumeCarrier was the only consumer and PR-F deletes it. +// - `tracecore_exporter_failure_rate` still appears in scrape via the +// SLO observable gauge (reports 0 with no readers registered). +// - `tracecore.exporter.calls_total{result,kind,component_id}` +// continues to surface because the sibling impl emits it on +// `set.Telemetry.MeterProvider` directly — dashboards / alerts +// keyed on the calls_total counter do not regress. +// - PR-F deletes `internal/selftelemetry` entirely, so the contract +// evaporates regardless. Removing now keeps the sibling +// import-graph clean and matches the stdoutexporter precedent. +// +// The per-exporter failure_rate gauge feed is the documented gap; the +// runtime degrades to the "no per-exporter signal" mode in line with +// the v0.1.x contract. +func newSelfTelemetry(ctx context.Context, set pipeline.CreateSettings, logger *slog.Logger) selfExporter { if set.Telemetry.MeterProvider == nil { - return selftelemetry.NewNoopExporter() + logger.Warn("otlphttp: no MeterProvider; self-telemetry using noop") + return newNoopSelfExporter() } - e, err := selftelemetry.NewExporter(set.ID, set.Telemetry.MeterProvider) + e, err := newSelfExporter(set.ID, set.Telemetry.MeterProvider) if err != nil { - return selftelemetry.NewNoopExporter() + recordInitError(ctx, set.Telemetry.MeterProvider, + "exporter", set.ID.String(), reasonInstrumentRegister) + logger.Warn("otlphttp self-telemetry init failed; using noop", "err", err) + return newNoopSelfExporter() } return e } -// SelfExporter exposes the selftelemetry handle so the runtime can -// register it for the M2 failure_rate observable gauge. -func (e *otlpExporter) SelfExporter() selftelemetry.Exporter { return e.telemetry } - // ErrShutdownSentinel exposes errShutdown for external test packages // to match via errors.Is without parsing the error string. Package- // internal callers use errShutdown directly. @@ -661,7 +683,7 @@ func (e *otlpExporter) maybeCompress(body []byte) ([]byte, string, error) { // classifyKind maps a transport error to a low-cardinality kind tag. // Retry-exhausted 5xx remains downstream, NOT io: operators triaging // by kind look at network for io and at the backend for downstream. -func classifyKind(err error) selftelemetry.Kind { +func classifyKind(err error) kind { if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { return kindIO } diff --git a/components/exporters/otlphttp/selftel.go b/components/exporters/otlphttp/selftel.go new file mode 100644 index 00000000..3cc77b05 --- /dev/null +++ b/components/exporters/otlphttp/selftel.go @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: Apache-2.0 + +// Exporter-scoped self-telemetry surface. Replaces the v0.1.x +// dependency on `internal/selftelemetry`, which is slated for deletion +// in RFC-0013 PR-F. Metric names + label shape are preserved +// (`tracecore.exporter.calls_total{result,kind,component_id}`) so +// dashboards / alerts don't regress. The instrumentation scope name is +// THIS exporter's Go import path — when the exporter moves under +// `module/` in PR-I, the scope name moves with it, matching OTel +// convention. +// +// Mirrors `components/exporters/stdoutexporter/selftel.go` (PR-B1). + +package otlphttp + +import ( + "context" + "errors" + "fmt" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/tracecoreai/tracecore/internal/pipeline" +) + +// kind is a low-cardinality error-class identifier for exporter failures. +// Mirrors the internal/selftelemetry.Kind type so the migration is +// mechanical; exporter-local because the canonical-Kind enforcement that +// the internal package owned moves into RFC-0013 PR-I's submodule. +type kind string + +const ( + // kindMarshal — pdata serialization or gzip compression failure. + kindMarshal kind = "marshal" + + // kindIO — network error, timeout, context cancellation, or + // shutdown abort. Transport-fault class. + kindIO kind = "io" + + // kindDownstream — server returned a permanent 4xx (not 429) or + // non-retryable 5xx OR a retryable 5xx that exhausted retries. + // Server-fault class — mirrors the v0.1.x selftelemetry.KindDownstream + // value the otlphttp package previously aliased. + kindDownstream kind = "downstream" +) + +// reasonInstrumentRegister labels init_errors_total ticks when OTel +// instrument registration failed at construction time. +const reasonInstrumentRegister = "instrument_register" + +// instrumentationScope pins the OTel scope name. Per OTel convention, +// the scope is the package's Go import path; PR-I changes this when +// the exporter moves into the module/ submodule. +const instrumentationScope = "github.com/tracecoreai/tracecore/components/exporters/otlphttp" + +// errNilMeterProvider mirrors selftelemetry.ErrNilMeterProvider — the +// factory is responsible for substituting the noop fallback + ticking +// init_errors_total. Returning a sentinel rather than a generic error +// lets the factory distinguish "wire-up bug" from "instrument register +// failure" if it ever needs to. +var errNilMeterProvider = errors.New("otlphttp: MeterProvider is nil") + +// selfExporter is the exporter-scoped self-health surface. Methods are +// non-blocking + safe for concurrent use; the noop impl discards. +// Mirrors the internal/selftelemetry.Exporter interface but trimmed to +// the exact surface otlphttp uses — no FailureRateReader (the +// per-exporter failure_rate aggregation contract is intentionally +// dropped by this port; see the package comment on SelfExporter +// removal in otlphttp.go). +// +// Why drop FailureRateReader / ExporterCarrier: +// - The runtime path that consumed ExporterCarrier (`cmd/tracecore` +// in v0.1.x) silently skipped components that didn't implement +// it — the documented "no per-exporter signal" degraded mode. The +// production-runtime caller has no current consumer in this tree. +// - `tracecore_exporter_failure_rate` still appears in scrape via the +// SLO observable gauge (reports 0 with no readers). +// - PR-F deletes `internal/selftelemetry` entirely, so any code that +// referenced ExporterCarrier here would have to be removed then +// anyway. Removing now keeps the sibling import-graph clean and +// matches the stdoutexporter precedent. +type selfExporter interface { + IncCallSuccess() + IncCallFailure(k kind) +} + +// noopSelfExporter discards every call. +type noopSelfExporter struct{} + +func newNoopSelfExporter() selfExporter { return noopSelfExporter{} } + +func (noopSelfExporter) IncCallSuccess() {} +func (noopSelfExporter) IncCallFailure(kind) {} + +var _ selfExporter = noopSelfExporter{} + +// newSelfExporter returns a real selfExporter backed by an OTel counter +// `tracecore.exporter.calls_total{result, kind, component_id}` acquired +// from mp. The component's id is attached as the `component_id` label +// on every emission. Metric name + label shape preserved from the +// v0.1.x internal selftelemetry package so dashboards / alerts don't +// regress. +func newSelfExporter(id pipeline.ID, mp metric.MeterProvider) (selfExporter, error) { + if mp == nil { + return nil, errNilMeterProvider + } + meter := mp.Meter(instrumentationScope) + + calls, err := meter.Int64Counter( + "tracecore.exporter.calls_total", + metric.WithDescription("Exporter Consume* calls partitioned by result"), + ) + if err != nil { + return nil, fmt.Errorf("exporter.calls_total counter: %w", err) + } + + return &selfExporterImpl{ + componentID: id.String(), + calls: calls, + }, nil +} + +var _ selfExporter = (*selfExporterImpl)(nil) + +type selfExporterImpl struct { + componentID string + calls metric.Int64Counter +} + +func (e *selfExporterImpl) IncCallSuccess() { + // Emit component_id + result in one WithAttributes call rather than + // merging two attribute sets — avoids relying on SDK merge semantics + // that vary across OTel versions. Mirrors the stdoutexporter sibling. + e.calls.Add(context.Background(), 1, metric.WithAttributes( + attribute.String("component_id", e.componentID), + attribute.String("result", "success"), + )) +} + +func (e *selfExporterImpl) IncCallFailure(k kind) { + e.calls.Add(context.Background(), 1, metric.WithAttributes( + attribute.String("component_id", e.componentID), + attribute.String("result", "failure"), + attribute.String("kind", string(k)), + )) +} + +// recordInitError ticks tracecore.selftelemetry.init_errors_total when +// exporter wiring falls back to noop telemetry. Operators alert on +// `> 0` to learn that self-telemetry isn't really plugged in. Panics +// from a broken MeterProvider are swallowed — recordInitError IS the +// degraded-path fallback; crashing here would turn a partial outage +// into a process kill. +func recordInitError(ctx context.Context, mp metric.MeterProvider, kindLabel, componentID, reason string) { + defer func() { _ = recover() }() + if mp == nil { + return + } + meter := mp.Meter(instrumentationScope) + c, err := meter.Int64Counter( + "tracecore.selftelemetry.init_errors_total", + metric.WithDescription("Counter of self-telemetry construction failures that fell back to the noop implementation."), + ) + if err != nil { + return + } + c.Add(ctx, 1, metric.WithAttributes( + attribute.String("kind", kindLabel), + attribute.String("component_id", componentID), + attribute.String("reason", reason), + )) +} diff --git a/components/exporters/otlphttp/selftel_test.go b/components/exporters/otlphttp/selftel_test.go new file mode 100644 index 00000000..6acea829 --- /dev/null +++ b/components/exporters/otlphttp/selftel_test.go @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: Apache-2.0 + +package otlphttp + +import ( + "context" + "errors" + "fmt" + "strings" + "testing" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/embedded" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + + "github.com/tracecoreai/tracecore/internal/pipeline" + "github.com/tracecoreai/tracecore/internal/pipeline/pipelinetest" +) + +// newTestMeterProvider builds an SDK MeterProvider backed by a ManualReader +// so tests can collect metricdata.ResourceMetrics deterministically without +// the Prometheus exporter or any internal/telemetry plumbing — the exporter +// package must stay decoupled from internal/* so PR-F can delete those +// packages without touching this test file. Mirrors the PR-B1 sibling +// pattern (`components/exporters/stdoutexporter/selftel_test.go`). +func newTestMeterProvider(t *testing.T) (*sdkmetric.MeterProvider, *sdkmetric.ManualReader) { + t.Helper() + rdr := sdkmetric.NewManualReader() + mp := sdkmetric.NewMeterProvider(sdkmetric.WithReader(rdr)) + t.Cleanup(func() { _ = mp.Shutdown(context.Background()) }) + return mp, rdr +} + +func collectRM(t *testing.T, rdr *sdkmetric.ManualReader) metricdata.ResourceMetrics { + t.Helper() + var rm metricdata.ResourceMetrics + if err := rdr.Collect(context.Background(), &rm); err != nil { + t.Fatalf("collect: %v", err) + } + return rm +} + +// findInstrument returns the first metricdata.Metrics whose Name matches the +// supplied OTel-dot name (e.g. "tracecore.exporter.calls_total"). Returns +// (nil, false) if absent. Scope-agnostic: walks all scope metrics. +func findInstrument(rm metricdata.ResourceMetrics, name string) (metricdata.Metrics, bool) { + for _, sm := range rm.ScopeMetrics { + for _, m := range sm.Metrics { + if m.Name == name { + return m, true + } + } + } + return metricdata.Metrics{}, false +} + +// scopeOf returns the instrumentation scope name that emitted the supplied +// metric name. Used to pin the scope-name standard for PR-B1 (sibling +// exporter port). +func scopeOf(rm metricdata.ResourceMetrics, name string) (string, bool) { + for _, sm := range rm.ScopeMetrics { + for _, m := range sm.Metrics { + if m.Name == name { + return sm.Scope.Name, true + } + } + } + return "", false +} + +// kvMatch returns true if every want key's value matches the int64 +// datapoint's attribute set. +func kvMatch(dp metricdata.DataPoint[int64], want map[string]string) bool { + for k, v := range want { + got, ok := dp.Attributes.Value(attribute.Key(k)) + if !ok || got.AsString() != v { + return false + } + } + return true +} + +// TestSelfTelemetry_NoopAlwaysSafe pins: newNoopSelfExporter returns a +// value whose hot-path methods never panic and silently discard. Every +// Consume* path calls into the selfExporter surface; nil-checks at each +// call site are forbidden, so the noop must be a real value. +func TestSelfTelemetry_NoopAlwaysSafe(t *testing.T) { + se := newNoopSelfExporter() + defer func() { + if r := recover(); r != nil { + t.Fatalf("noop panicked: %v", r) + } + }() + se.IncCallSuccess() + se.IncCallSuccess() + se.IncCallFailure(kindMarshal) + se.IncCallFailure(kindIO) + se.IncCallFailure(kindDownstream) +} + +// TestSelfTelemetry_NewExporter_NilProviderErrors pins: newSelfExporter +// returns errNilMeterProvider when called with a nil provider rather than +// silently substituting noop — the factory is responsible for the fallback +// + the recordInitError tick. Mirrors the stdoutexporter sibling contract. +func TestSelfTelemetry_NewExporter_NilProviderErrors(t *testing.T) { + _, err := newSelfExporter(testID(), nil) + if !errors.Is(err, errNilMeterProvider) { + t.Fatalf("err = %v, want errNilMeterProvider", err) + } +} + +// TestSelfTelemetry_EmitsCallsTotal_WithResultKindAndComponentID pins the +// M2 metric contract for exporters. After IncCallSuccess() ×2 + +// IncCallFailure(kindMarshal) ×1 + IncCallFailure(kindIO) ×1 + +// IncCallFailure(kindDownstream) ×1, the ManualReader collects +// tracecore.exporter.calls_total with datapoints partitioned by result and +// (for failures) kind, labeled with the component_id. A regression that +// drops the kind label, the component_id label, the result label, or the +// metric-name prefix fails here. +func TestSelfTelemetry_EmitsCallsTotal_WithResultKindAndComponentID(t *testing.T) { + mp, rdr := newTestMeterProvider(t) + se, err := newSelfExporter(testID(), mp) + if err != nil { + t.Fatalf("newSelfExporter: %v", err) + } + se.IncCallSuccess() + se.IncCallSuccess() + se.IncCallFailure(kindMarshal) + se.IncCallFailure(kindIO) + se.IncCallFailure(kindDownstream) + + rm := collectRM(t, rdr) + m, ok := findInstrument(rm, "tracecore.exporter.calls_total") + if !ok { + t.Fatalf("metric tracecore.exporter.calls_total absent; have: %s", dumpNames(rm)) + } + sum, ok := m.Data.(metricdata.Sum[int64]) + if !ok { + t.Fatalf("calls_total data shape: got %T, want metricdata.Sum[int64]", m.Data) + } + + // Index datapoints by a (result,kind) key once; per-case assertions + // below stay readable + the function stays under gocyclo's 15-edge + // budget. component_id is asserted per-datapoint inside the loop. + got := map[string]int{} + for _, dp := range sum.DataPoints { + if !kvMatch(dp, map[string]string{"component_id": "otlphttp/test"}) { + t.Errorf("datapoint missing component_id=otlphttp/test: %v", dp.Attributes) + continue + } + result, _ := dp.Attributes.Value("result") + kindVal, _ := dp.Attributes.Value("kind") + key := result.AsString() + "/" + kindVal.AsString() + got[key] += int(dp.Value) + } + wantCounts := map[string]int{ + "success/": 2, + "failure/marshal": 1, + "failure/io": 1, + "failure/downstream": 1, + } + for key, want := range wantCounts { + if got[key] != want { + t.Errorf("calls_total[%q]: got %d, want %d (all=%v)", key, got[key], want, got) + } + } +} + +// TestSelfTelemetry_ScopeNameIsExporterImportPath pins the OTel scope-name +// standard: instrumentation scope = exporter's Go import path. This anchors +// the PR-B1-style decision (vs reusing the deleted internal/selftelemetry +// scope) so a future drift back to the internal name fails here. +func TestSelfTelemetry_ScopeNameIsExporterImportPath(t *testing.T) { + mp, rdr := newTestMeterProvider(t) + se, err := newSelfExporter(testID(), mp) + if err != nil { + t.Fatalf("newSelfExporter: %v", err) + } + se.IncCallSuccess() + rm := collectRM(t, rdr) + scope, ok := scopeOf(rm, "tracecore.exporter.calls_total") + if !ok { + t.Fatalf("calls_total absent") + } + const wantScope = "github.com/tracecoreai/tracecore/components/exporters/otlphttp" + if scope != wantScope { + t.Errorf("instrumentation scope: got %q, want %q", scope, wantScope) + } +} + +// TestRecordInitError_TicksInitErrorsCounter pins: when factory wiring +// fails (newSelfExporter returns an error), recordInitError surfaces a +// tracecore.selftelemetry.init_errors_total tick with kind="exporter", +// the component_id label, and reason="instrument_register". This is the +// only signal that an exporter fell back to noop telemetry; dropping the +// recordInitError call must fail this test. +func TestRecordInitError_TicksInitErrorsCounter(t *testing.T) { + mp, rdr := newTestMeterProvider(t) + recordInitError(context.Background(), mp, "exporter", testID().String(), reasonInstrumentRegister) + + rm := collectRM(t, rdr) + m, ok := findInstrument(rm, "tracecore.selftelemetry.init_errors_total") + if !ok { + t.Fatalf("init_errors_total absent; have: %s", dumpNames(rm)) + } + sum, ok := m.Data.(metricdata.Sum[int64]) + if !ok { + t.Fatalf("init_errors_total data shape: got %T, want metricdata.Sum[int64]", m.Data) + } + if len(sum.DataPoints) != 1 { + t.Fatalf("init_errors datapoints: got %d, want 1", len(sum.DataPoints)) + } + dp := sum.DataPoints[0] + want := map[string]string{ + "kind": "exporter", + "component_id": "otlphttp/test", + "reason": reasonInstrumentRegister, + } + if !kvMatch(dp, want) { + t.Errorf("init_errors attrs: got %v, want %v", dp.Attributes, want) + } + if dp.Value != 1 { + t.Errorf("init_errors value: got %d, want 1", dp.Value) + } +} + +// TestRecordInitError_NilProviderIsSafe pins: a nil MeterProvider must +// not panic — recordInitError IS the fallback path; crashing here would +// turn a partial degradation into a process kill. +func TestRecordInitError_NilProviderIsSafe(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Fatalf("recordInitError(nil) panicked: %v", r) + } + }() + recordInitError(context.Background(), nil, "exporter", "x/y", reasonInstrumentRegister) +} + +// TestFactory_FallsBackToNoopWhenMeterFails pins the factory +// observability contract end-to-end: when newSelfExporter returns an +// error (synthetic register failure for every tracecore.exporter.* +// instrument), the factory MUST (1) leave the exporter with a working +// noop telemetry field (no nil, no panic on hot-path calls), AND (2) +// tick tracecore.selftelemetry.init_errors_total via recordInitError. +// Mirrors the stdoutexporter sibling test seam. +func TestFactory_FallsBackToNoopWhenMeterFails(t *testing.T) { + mp, rdr := newTestMeterProvider(t) + failing := &failingExporterMP{real: mp} + + fx := pipelinetest.New(t) + fx.CreateSettings.ID = pipeline.MustNewID(componentType(), "test") + fx.CreateSettings.Telemetry.MeterProvider = failing + cfg := &Config{Endpoint: "http://127.0.0.1:1"} + e, err := Factory.CreateMetrics(context.Background(), fx.CreateSettings, cfg) + if err != nil { + t.Fatalf("CreateMetrics: %v", err) + } + exp, ok := e.(*otlpExporter) + if !ok { + t.Fatalf("exporter type: got %T, want *otlpExporter", e) + } + if exp.telemetry == nil { + t.Fatal("telemetry field nil after failed wiring; must fall back to noop") + } + // Hot-path call must not panic + must not surface (noop discards). + exp.telemetry.IncCallSuccess() + exp.telemetry.IncCallFailure(kindIO) + + rm := collectRM(t, rdr) + if m, ok := findInstrument(rm, "tracecore.exporter.calls_total"); ok { + if sum, ok := m.Data.(metricdata.Sum[int64]); ok && len(sum.DataPoints) > 0 { + t.Errorf("noop fallback leaked Inc* into calls_total datapoints: %v", sum.DataPoints) + } + } + m, ok := findInstrument(rm, "tracecore.selftelemetry.init_errors_total") + if !ok { + t.Fatalf("init_errors_total absent after factory fallback; have: %s", dumpNames(rm)) + } + sum, ok := m.Data.(metricdata.Sum[int64]) + if !ok { + t.Fatalf("init_errors_total data shape: got %T", m.Data) + } + if len(sum.DataPoints) != 1 || sum.DataPoints[0].Value != 1 { + t.Errorf("init_errors_total: want 1 datapoint value=1, got %v", sum.DataPoints) + } +} + +// TestFactory_FallsBackToNoopWhenMeterProviderIsNil pins the +// nil-MeterProvider symmetry of the register-failure fallback: when +// `set.Telemetry.MeterProvider` is nil at construction (no telemetry +// wired), the factory MUST (1) return without error, (2) leave the +// exporter with a working noop telemetry field (no nil, no panic on +// hot-path calls), and (3) emit no datapoints anywhere — there's no +// MeterProvider to scrape. The skip-tick semantic for the nil path is +// intentional: `recordInitError` is only meaningful when telemetry is +// wired but instrument registration failed; a nil provider means the +// operator opted out of telemetry entirely, so a phantom counter would +// be noise. Mirrors `TestFactory_FallsBackToNoopWhenMeterFails` minus +// the failingExporterMP wrapper. +func TestFactory_FallsBackToNoopWhenMeterProviderIsNil(t *testing.T) { + fx := pipelinetest.New(t) + fx.CreateSettings.ID = pipeline.MustNewID(componentType(), "test") + fx.CreateSettings.Telemetry.MeterProvider = nil + cfg := &Config{Endpoint: "http://127.0.0.1:1"} + + e, err := Factory.CreateMetrics(context.Background(), fx.CreateSettings, cfg) + if err != nil { + t.Fatalf("CreateMetrics: %v", err) + } + exp, ok := e.(*otlpExporter) + if !ok { + t.Fatalf("exporter type: got %T, want *otlpExporter", e) + } + if exp.telemetry == nil { + t.Fatal("telemetry field nil after nil-MeterProvider construction; must fall back to noop") + } + // Hot-path calls must not panic. No MeterProvider exists, so there + // is also nothing to scrape — the noop discards by definition. + defer func() { + if r := recover(); r != nil { + t.Fatalf("noop telemetry panicked on hot path: %v", r) + } + }() + exp.telemetry.IncCallSuccess() + exp.telemetry.IncCallFailure(kindIO) +} + +// asSelfExporter is a compile-time pin: it accepts the package-local +// selfExporter interface only. If a future refactor moves the type +// back into internal/selftelemetry (e.g. reintroduces the +// selftelemetry.Exporter alias), this function's signature breaks + +// every caller fails compile. Pairs with the kind-value asserts below +// to pin the sibling-types contract that PR-B1 established. +func asSelfExporter(se selfExporter) selfExporter { return se } + +// TestSelfExporter_SiblingTypesArePackageLocal pins the PR-B1 sibling +// contract: the otlphttp package owns its own selfExporter + kind +// types — they must NOT come from internal/selftelemetry. If a future +// refactor reintroduces that import, the asSelfExporter signature +// changes type → break compile here. The kind-value asserts pin the +// wire-format strings ("marshal", "io", "downstream") that operators +// query — the README's "Self-telemetry labels" table mandates exactly +// these three values for the otlphttp exporter. +func TestSelfExporter_SiblingTypesArePackageLocal(t *testing.T) { + iface := asSelfExporter(newNoopSelfExporter()) + iface.IncCallSuccess() + iface.IncCallFailure(kindMarshal) + iface.IncCallFailure(kindDownstream) + + if string(kindMarshal) != "marshal" { + t.Errorf("kindMarshal: got %q, want %q", string(kindMarshal), "marshal") + } + if string(kindIO) != "io" { + t.Errorf("kindIO: got %q, want %q", string(kindIO), "io") + } + if string(kindDownstream) != "downstream" { + t.Errorf("kindDownstream: got %q, want %q", string(kindDownstream), "downstream") + } +} + +func testID() pipeline.ID { + return pipeline.MustNewID(componentType(), "test") +} + +func dumpNames(rm metricdata.ResourceMetrics) string { + var b strings.Builder + for _, sm := range rm.ScopeMetrics { + for _, m := range sm.Metrics { + fmt.Fprintf(&b, " %s@%s", m.Name, sm.Scope.Name) + } + } + return b.String() +} + +// failingExporterMP wraps a real MeterProvider but fails every instrument +// registration whose name starts with "tracecore.exporter.". Mirrors the +// stdoutexporter sibling test seam so a future refactor that reorders the +// newSelfExporter constructor doesn't silently bypass coverage. +type failingExporterMP struct { + embedded.MeterProvider + real metric.MeterProvider +} + +func (p *failingExporterMP) Meter(name string, opts ...metric.MeterOption) metric.Meter { + return &failingExporterMeter{Meter: p.real.Meter(name, opts...)} +} + +type failingExporterMeter struct { + metric.Meter +} + +const exporterInstrumentPrefix = "tracecore.exporter." + +var errSyntheticExporterFailure = errors.New("synthetic: exporter instrument registration failed") + +func (m *failingExporterMeter) Int64Counter(name string, opts ...metric.Int64CounterOption) (metric.Int64Counter, error) { + if strings.HasPrefix(name, exporterInstrumentPrefix) { + return nil, errSyntheticExporterFailure + } + c, err := m.Meter.Int64Counter(name, opts...) + if err != nil { + return nil, fmt.Errorf("failingExporterMeter passthrough: %w", err) + } + return c, nil +} From d634656fcd80513414824fcf77cde389505f39bf Mon Sep 17 00:00:00 2001 From: Tri Lam Date: Sat, 30 May 2026 23:26:04 -0700 Subject: [PATCH 2/2] fix(otlphttp): receiver-prefix test names per cross-cut criterion Rename TestSelfTelemetry_* / TestFactory_* / TestSelfExporter_* / TestRecordInitError_* tests in selftel_test.go + factory_test.go to TestOtlphttp_* form so test output unambiguously identifies the package under test. Doc comments above each test updated in lockstep. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Tri Lam --- components/exporters/otlphttp/factory_test.go | 14 +++---- components/exporters/otlphttp/selftel_test.go | 38 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/components/exporters/otlphttp/factory_test.go b/components/exporters/otlphttp/factory_test.go index 636ef289..85a31eea 100644 --- a/components/exporters/otlphttp/factory_test.go +++ b/components/exporters/otlphttp/factory_test.go @@ -13,19 +13,19 @@ import ( "github.com/tracecoreai/tracecore/internal/pipeline/pipelinetest" ) -func TestFactory_TypeIsOtlphttp(t *testing.T) { +func TestOtlphttp_TypeIsOtlphttp(t *testing.T) { t.Parallel() require.Equal(t, "otlphttp", otlphttp.Factory.Type().String()) } -func TestFactory_CreateDefaultConfig_ReturnsConfigPointer(t *testing.T) { +func TestOtlphttp_CreateDefaultConfig_ReturnsConfigPointer(t *testing.T) { t.Parallel() cfg := otlphttp.Factory.CreateDefaultConfig() _, ok := cfg.(*otlphttp.Config) require.True(t, ok, "factory must produce *Config") } -func TestFactory_NewFactoryReturnsSameInstance(t *testing.T) { +func TestOtlphttp_NewFactoryReturnsSameInstance(t *testing.T) { t.Parallel() // tools/components-gen calls NewFactory(); the package also // exposes Factory directly. They must be the same value so @@ -33,7 +33,7 @@ func TestFactory_NewFactoryReturnsSameInstance(t *testing.T) { require.Same(t, otlphttp.Factory, otlphttp.NewFactory()) } -func TestFactory_CreateMetrics_ReturnsExporter(t *testing.T) { +func TestOtlphttp_CreateMetrics_ReturnsExporter(t *testing.T) { t.Parallel() fx := pipelinetest.New(t) cfg := &otlphttp.Config{Endpoint: "http://localhost:4318"} @@ -45,7 +45,7 @@ func TestFactory_CreateMetrics_ReturnsExporter(t *testing.T) { require.True(t, ok, "exporter must implement consumer.Metrics") } -func TestFactory_CreateTraces_ReturnsExporter(t *testing.T) { +func TestOtlphttp_CreateTraces_ReturnsExporter(t *testing.T) { t.Parallel() fx := pipelinetest.New(t) cfg := &otlphttp.Config{Endpoint: "http://localhost:4318"} @@ -57,7 +57,7 @@ func TestFactory_CreateTraces_ReturnsExporter(t *testing.T) { require.True(t, ok, "exporter must implement consumer.Traces") } -func TestFactory_CreateLogs_ReturnsExporter(t *testing.T) { +func TestOtlphttp_CreateLogs_ReturnsExporter(t *testing.T) { t.Parallel() fx := pipelinetest.New(t) cfg := &otlphttp.Config{Endpoint: "http://localhost:4318"} @@ -69,7 +69,7 @@ func TestFactory_CreateLogs_ReturnsExporter(t *testing.T) { require.True(t, ok, "exporter must implement consumer.Logs") } -func TestFactory_CreateMetrics_RejectsWrongConfigType(t *testing.T) { +func TestOtlphttp_CreateMetrics_RejectsWrongConfigType(t *testing.T) { t.Parallel() fx := pipelinetest.New(t) // Passing a config of the wrong type should fail with a clear diff --git a/components/exporters/otlphttp/selftel_test.go b/components/exporters/otlphttp/selftel_test.go index 6acea829..2c5573d7 100644 --- a/components/exporters/otlphttp/selftel_test.go +++ b/components/exporters/otlphttp/selftel_test.go @@ -82,11 +82,11 @@ func kvMatch(dp metricdata.DataPoint[int64], want map[string]string) bool { return true } -// TestSelfTelemetry_NoopAlwaysSafe pins: newNoopSelfExporter returns a +// TestOtlphttp_NoopAlwaysSafe pins: newNoopSelfExporter returns a // value whose hot-path methods never panic and silently discard. Every // Consume* path calls into the selfExporter surface; nil-checks at each // call site are forbidden, so the noop must be a real value. -func TestSelfTelemetry_NoopAlwaysSafe(t *testing.T) { +func TestOtlphttp_NoopAlwaysSafe(t *testing.T) { se := newNoopSelfExporter() defer func() { if r := recover(); r != nil { @@ -100,18 +100,18 @@ func TestSelfTelemetry_NoopAlwaysSafe(t *testing.T) { se.IncCallFailure(kindDownstream) } -// TestSelfTelemetry_NewExporter_NilProviderErrors pins: newSelfExporter +// TestOtlphttp_NewExporter_NilProviderErrors pins: newSelfExporter // returns errNilMeterProvider when called with a nil provider rather than // silently substituting noop — the factory is responsible for the fallback // + the recordInitError tick. Mirrors the stdoutexporter sibling contract. -func TestSelfTelemetry_NewExporter_NilProviderErrors(t *testing.T) { +func TestOtlphttp_NewExporter_NilProviderErrors(t *testing.T) { _, err := newSelfExporter(testID(), nil) if !errors.Is(err, errNilMeterProvider) { t.Fatalf("err = %v, want errNilMeterProvider", err) } } -// TestSelfTelemetry_EmitsCallsTotal_WithResultKindAndComponentID pins the +// TestOtlphttp_EmitsCallsTotal_WithResultKindAndComponentID pins the // M2 metric contract for exporters. After IncCallSuccess() ×2 + // IncCallFailure(kindMarshal) ×1 + IncCallFailure(kindIO) ×1 + // IncCallFailure(kindDownstream) ×1, the ManualReader collects @@ -119,7 +119,7 @@ func TestSelfTelemetry_NewExporter_NilProviderErrors(t *testing.T) { // (for failures) kind, labeled with the component_id. A regression that // drops the kind label, the component_id label, the result label, or the // metric-name prefix fails here. -func TestSelfTelemetry_EmitsCallsTotal_WithResultKindAndComponentID(t *testing.T) { +func TestOtlphttp_EmitsCallsTotal_WithResultKindAndComponentID(t *testing.T) { mp, rdr := newTestMeterProvider(t) se, err := newSelfExporter(testID(), mp) if err != nil { @@ -168,11 +168,11 @@ func TestSelfTelemetry_EmitsCallsTotal_WithResultKindAndComponentID(t *testing.T } } -// TestSelfTelemetry_ScopeNameIsExporterImportPath pins the OTel scope-name +// TestOtlphttp_ScopeNameIsExporterImportPath pins the OTel scope-name // standard: instrumentation scope = exporter's Go import path. This anchors // the PR-B1-style decision (vs reusing the deleted internal/selftelemetry // scope) so a future drift back to the internal name fails here. -func TestSelfTelemetry_ScopeNameIsExporterImportPath(t *testing.T) { +func TestOtlphttp_ScopeNameIsExporterImportPath(t *testing.T) { mp, rdr := newTestMeterProvider(t) se, err := newSelfExporter(testID(), mp) if err != nil { @@ -190,13 +190,13 @@ func TestSelfTelemetry_ScopeNameIsExporterImportPath(t *testing.T) { } } -// TestRecordInitError_TicksInitErrorsCounter pins: when factory wiring +// TestOtlphttp_RecordInitError_TicksInitErrorsCounter pins: when factory wiring // fails (newSelfExporter returns an error), recordInitError surfaces a // tracecore.selftelemetry.init_errors_total tick with kind="exporter", // the component_id label, and reason="instrument_register". This is the // only signal that an exporter fell back to noop telemetry; dropping the // recordInitError call must fail this test. -func TestRecordInitError_TicksInitErrorsCounter(t *testing.T) { +func TestOtlphttp_RecordInitError_TicksInitErrorsCounter(t *testing.T) { mp, rdr := newTestMeterProvider(t) recordInitError(context.Background(), mp, "exporter", testID().String(), reasonInstrumentRegister) @@ -226,10 +226,10 @@ func TestRecordInitError_TicksInitErrorsCounter(t *testing.T) { } } -// TestRecordInitError_NilProviderIsSafe pins: a nil MeterProvider must +// TestOtlphttp_RecordInitError_NilProviderIsSafe pins: a nil MeterProvider must // not panic — recordInitError IS the fallback path; crashing here would // turn a partial degradation into a process kill. -func TestRecordInitError_NilProviderIsSafe(t *testing.T) { +func TestOtlphttp_RecordInitError_NilProviderIsSafe(t *testing.T) { defer func() { if r := recover(); r != nil { t.Fatalf("recordInitError(nil) panicked: %v", r) @@ -238,14 +238,14 @@ func TestRecordInitError_NilProviderIsSafe(t *testing.T) { recordInitError(context.Background(), nil, "exporter", "x/y", reasonInstrumentRegister) } -// TestFactory_FallsBackToNoopWhenMeterFails pins the factory +// TestOtlphttp_FallsBackToNoopWhenMeterFails pins the factory // observability contract end-to-end: when newSelfExporter returns an // error (synthetic register failure for every tracecore.exporter.* // instrument), the factory MUST (1) leave the exporter with a working // noop telemetry field (no nil, no panic on hot-path calls), AND (2) // tick tracecore.selftelemetry.init_errors_total via recordInitError. // Mirrors the stdoutexporter sibling test seam. -func TestFactory_FallsBackToNoopWhenMeterFails(t *testing.T) { +func TestOtlphttp_FallsBackToNoopWhenMeterFails(t *testing.T) { mp, rdr := newTestMeterProvider(t) failing := &failingExporterMP{real: mp} @@ -287,7 +287,7 @@ func TestFactory_FallsBackToNoopWhenMeterFails(t *testing.T) { } } -// TestFactory_FallsBackToNoopWhenMeterProviderIsNil pins the +// TestOtlphttp_FallsBackToNoopWhenMeterProviderIsNil pins the // nil-MeterProvider symmetry of the register-failure fallback: when // `set.Telemetry.MeterProvider` is nil at construction (no telemetry // wired), the factory MUST (1) return without error, (2) leave the @@ -297,9 +297,9 @@ func TestFactory_FallsBackToNoopWhenMeterFails(t *testing.T) { // intentional: `recordInitError` is only meaningful when telemetry is // wired but instrument registration failed; a nil provider means the // operator opted out of telemetry entirely, so a phantom counter would -// be noise. Mirrors `TestFactory_FallsBackToNoopWhenMeterFails` minus +// be noise. Mirrors `TestOtlphttp_FallsBackToNoopWhenMeterFails` minus // the failingExporterMP wrapper. -func TestFactory_FallsBackToNoopWhenMeterProviderIsNil(t *testing.T) { +func TestOtlphttp_FallsBackToNoopWhenMeterProviderIsNil(t *testing.T) { fx := pipelinetest.New(t) fx.CreateSettings.ID = pipeline.MustNewID(componentType(), "test") fx.CreateSettings.Telemetry.MeterProvider = nil @@ -335,7 +335,7 @@ func TestFactory_FallsBackToNoopWhenMeterProviderIsNil(t *testing.T) { // to pin the sibling-types contract that PR-B1 established. func asSelfExporter(se selfExporter) selfExporter { return se } -// TestSelfExporter_SiblingTypesArePackageLocal pins the PR-B1 sibling +// TestOtlphttp_SiblingTypesArePackageLocal pins the PR-B1 sibling // contract: the otlphttp package owns its own selfExporter + kind // types — they must NOT come from internal/selftelemetry. If a future // refactor reintroduces that import, the asSelfExporter signature @@ -343,7 +343,7 @@ func asSelfExporter(se selfExporter) selfExporter { return se } // wire-format strings ("marshal", "io", "downstream") that operators // query — the README's "Self-telemetry labels" table mandates exactly // these three values for the otlphttp exporter. -func TestSelfExporter_SiblingTypesArePackageLocal(t *testing.T) { +func TestOtlphttp_SiblingTypesArePackageLocal(t *testing.T) { iface := asSelfExporter(newNoopSelfExporter()) iface.IncCallSuccess() iface.IncCallFailure(kindMarshal)