From ae597d1d2cec38f409756ed31f1655e1127a5683 Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Wed, 17 Jun 2026 13:26:30 +0200 Subject: [PATCH 1/2] OtlpProxy: fix stale-connection drops and error span noise PooledConnectionIdleTimeout was unset (defaulted to 60s), so .NET reused sockets closed server-side across Lambda freeze/thaw cycles, causing batches to be silently dropped. Set PooledConnectionIdleTimeout=2s so idle sockets are discarded before the ADOT sidecar can reset them. Replace the one-dimensional StaleConnectionDrops counter with a dimensioned otlp.proxy.forward metric (outcome x signal_type) for forwarding success-rate visibility. Set Activity status deliberately so stale drops don't surface as red spans. Suppress the duplicate auto-instrumented HttpClient span for the proxy hop (already covered by the app-level span) via a loopback filter on AddHttpClientInstrumentation. Co-Authored-By: Claude Opus 4.8 --- .../OpenTelemetryRegistrationExtensions.cs | 13 +++++- src/api/Elastic.Documentation.Api/Program.cs | 11 ++++- .../ServicesExtension.cs | 6 ++- .../Telemetry/AdotOtlpService.cs | 46 +++++++++++++++---- .../OtlpProxyTests.cs | 9 ++-- 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs b/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs index 571aa0860..b7e1b5600 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs +++ b/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs @@ -20,6 +20,13 @@ public record OtelRegistration(string ServiceName) public Action? Tracing { get; init; } public Action? Metrics { get; init; } public Action? Logging { get; init; } + /// + /// Optional filter for HttpClient tracing instrumentation. + /// Return false to exclude a request from tracing. + /// Use to suppress internal sidecar traffic (e.g. a localhost OTLP proxy hop) + /// that is already covered by an app-level span. + /// + public Func? HttpClientTracingFilter { get; init; } } public static class OpenTelemetryRegistrationExtensions @@ -91,7 +98,11 @@ public static TBuilder AddDocumentationOpenTelemetry(this TBuilder bui }; }) .AddProcessor() // Automatically add euid to all child spans - .AddHttpClientInstrumentation(); + .AddHttpClientInstrumentation(opts => + { + if (registration.HttpClientTracingFilter is { } filter) + opts.FilterHttpRequestMessage = filter; + }); registration.Tracing?.Invoke(options, tracing); }) .WithMetrics(metrics => diff --git a/src/api/Elastic.Documentation.Api/Program.cs b/src/api/Elastic.Documentation.Api/Program.cs index 9c5495a0b..6837aa291 100644 --- a/src/api/Elastic.Documentation.Api/Program.cs +++ b/src/api/Elastic.Documentation.Api/Program.cs @@ -24,7 +24,16 @@ }) .AddDocumentationOpenTelemetry(new OtelRegistration("docs-api") { - Tracing = (_, t) => t.AddDocsApiTracing() + Tracing = (_, t) => t.AddDocsApiTracing(), + // Exclude the OtlpProxy's own forwarding hops from auto-instrumentation: these requests + // to the localhost ADOT sidecar are already covered by the app-level "forward otlp" span + // in AdotOtlpService, with correct status semantics. The raw HttpClient span would surface + // expected transient resets as error spans. + // Match on loopback host + OTLP HTTP path prefix; port-agnostic so it works whether the + // sidecar listens on 4318 (HTTP) or 4317 (gRPC-HTTP/1.1) or a custom OTEL_EXPORTER_OTLP_ENDPOINT. + HttpClientTracingFilter = req => + req.RequestUri is not { Host: "localhost" or "127.0.0.1" } otlpUri + || !otlpUri.AbsolutePath.StartsWith("/v1/", StringComparison.Ordinal) }) .HealthCheckBuilderExtensions(); diff --git a/src/api/Elastic.Documentation.Api/ServicesExtension.cs b/src/api/Elastic.Documentation.Api/ServicesExtension.cs index 5e79cb6d3..7450a3d12 100644 --- a/src/api/Elastic.Documentation.Api/ServicesExtension.cs +++ b/src/api/Elastic.Documentation.Api/ServicesExtension.cs @@ -209,8 +209,9 @@ private static void AddOtlpProxyService(IServiceCollection services, AppEnv appE // 1s timeout: the collector is a localhost sidecar and should answer in single-digit ms. // RemoveAllResilienceHandlers opts this client out of the global standard resilience handler // (retries + 10s/30s timeouts) so a dead collector fails fast instead of blocking ~9s. - // PooledConnectionLifetime=30s proactively recycles connections before the sidecar closes them, - // keeping the stale-connection drop rate negligible. + // PooledConnectionIdleTimeout=2s: close idle sockets before the ADOT sidecar closes them + // server-side, preventing stale-connection reuse across Lambda freeze/thaw cycles. + // PooledConnectionLifetime=30s: cap long-lived busy connections. #pragma warning disable EXTEXP0001 // RemoveAllResilienceHandlers is experimental _ = services.AddHttpClient(AdotOtlpService.HttpClientName) .ConfigureHttpClient(client => @@ -219,6 +220,7 @@ private static void AddOtlpProxyService(IServiceCollection services, AppEnv appE }) .ConfigurePrimaryHttpMessageHandler(() => new SocketsHttpHandler { + PooledConnectionIdleTimeout = TimeSpan.FromSeconds(2), PooledConnectionLifetime = TimeSpan.FromSeconds(30), }) .RemoveAllResilienceHandlers(); diff --git a/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs b/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs index cf6809ed0..b5003cfef 100644 --- a/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs +++ b/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs @@ -20,9 +20,9 @@ public class AdotOtlpService( public const string HttpClientName = "OtlpProxy"; private static readonly ActivitySource ActivitySource = new(TelemetryConstants.OtlpProxySourceName); private static readonly Meter Meter = new(TelemetryConstants.OtlpProxySourceName); - internal static readonly Counter StaleConnectionDrops = - Meter.CreateCounter("otlp.proxy.stale_connection.dropped", - description: "OTLP batches silently dropped due to a stale pooled connection to the ADOT collector"); + private static readonly Counter ForwardCounter = + Meter.CreateCounter("otlp.proxy.forward", + description: "OTLP batches processed, by outcome (forwarded/stale_drop/collector_unavailable/timeout/error) and signal_type"); private readonly HttpClient _httpClient = httpClientFactory.CreateClient(HttpClientName); /// @@ -33,10 +33,12 @@ public async Task ForwardOtlp( Cancel ctx = default) { using var activity = ActivitySource.StartActivity("forward otlp", ActivityKind.Client); + var signalTag = signalType.ToStringFast(true); + _ = activity?.SetTag("otlp.signal_type", signalTag); try { - var targetUrl = $"{options.Endpoint.TrimEnd('/')}/v1/{signalType.ToStringFast(true)}"; + var targetUrl = $"{options.Endpoint.TrimEnd('/')}/v1/{signalTag}"; logger.LogDebug("Forwarding OTLP {SignalType} to ADOT collector at {TargetUrl}", signalType, targetUrl); using var request = new HttpRequestMessage(HttpMethod.Post, targetUrl); @@ -52,10 +54,18 @@ public async Task ForwardOtlp( { logger.LogError("OTLP forward to ADOT failed with status {StatusCode}: {Content}", response.StatusCode, responseContent); + _ = activity?.SetStatus(ActivityStatusCode.Error, $"Collector returned {(int)response.StatusCode}"); + ForwardCounter.Add(1, + new KeyValuePair("outcome", "error"), + new KeyValuePair("signal_type", signalTag)); } else { logger.LogDebug("Successfully forwarded OTLP {SignalType} to ADOT collector", signalType); + _ = activity?.SetStatus(ActivityStatusCode.Ok); + ForwardCounter.Add(1, + new KeyValuePair("outcome", "forwarded"), + new KeyValuePair("signal_type", signalTag)); } return new OtlpForwardResult @@ -69,15 +79,32 @@ public async Task ForwardOtlp( var (statusCode, message) = MapExceptionToStatusCode(ex); if (statusCode == 204) { - StaleConnectionDrops.Add(1); + // Stale connection: streaming body cannot be replayed under the zero-copy constraint, + // so this batch is dropped best-effort. Rare after PooledConnectionIdleTimeout tuning. + // Leave activity status unset (not Error) — this is an expected transient condition. + _ = activity?.SetTag("otlp.proxy.outcome", "stale_drop"); logger.LogDebug("Dropped OTLP {SignalType} batch on stale connection; collector will reconnect", signalType); } else + { + _ = activity?.SetStatus(ActivityStatusCode.Error, message); logger.LogError(ex, "Error forwarding OTLP {SignalType}: {ErrorMessage}", signalType, message); + } + ForwardCounter.Add(1, + new KeyValuePair("outcome", OutcomeTag(statusCode)), + new KeyValuePair("signal_type", signalTag)); return new OtlpForwardResult { StatusCode = statusCode, Content = message }; } } + private static string OutcomeTag(int statusCode) => statusCode switch + { + 204 => "stale_drop", + 503 => "collector_unavailable", + 504 => "timeout", + _ => "error" + }; + private static (int StatusCode, string Message) MapExceptionToStatusCode(Exception ex) => ex switch { @@ -92,9 +119,12 @@ private static (int StatusCode, string Message) MapExceptionToStatusCode(Excepti TaskCanceledException or OperationCanceledException => (504, "Request to telemetry collector timed out"), - // Stale pooled connection — SocketsHttpHandler sets AllowRetry=false for non-seekable - // StreamContent, so it throws rather than retrying. OTLP is best-effort; return 204 - // so the browser exporter doesn't treat this as a retryable 502. + // Stale connection reset — streaming body cannot be replayed (zero-copy proxy constraint), + // so the batch is dropped best-effort. Return 204 so the browser exporter doesn't treat it + // as a retryable 502. Rare after PooledConnectionIdleTimeout tuning. + HttpRequestException { InnerException: SocketException { SocketErrorCode: SocketError.ConnectionReset } } + => (204, string.Empty), + HttpRequestException { InnerException: IOException } => (204, string.Empty), diff --git a/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs b/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs index 2015e2151..6f8177d74 100644 --- a/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs +++ b/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs @@ -335,10 +335,11 @@ public async Task OtlpProxy_SendAsyncThrowsNonIo_MapsToGatewayError() [Fact] public async Task OtlpProxy_StaleConnection_DropsWithNoContent() { - // SocketsHttpHandler detects a stale pooled connection and throws - // HttpRequestException { InnerException: IOException } (AllowRetry=false on non-seekable - // StreamContent). The proxy maps this to 204 so the browser OTLP exporter doesn't - // interpret it as a retryable 502. + // The proxy streams the body zero-copy (no buffering), so a stale connection cannot be + // recovered by replaying the request. The batch is dropped best-effort and the proxy + // returns 204 so the browser OTLP exporter doesn't interpret it as a retryable 502. + // In practice this path is rare because PooledConnectionIdleTimeout closes idle sockets + // before the ADOT sidecar can reset them. var mockHandler = A.Fake(); A.CallTo(mockHandler) From d9789848bb3e2f003eb9af9d639485daf5671859 Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Wed, 17 Jun 2026 13:50:16 +0200 Subject: [PATCH 2/2] OtlpProxy: remove in-app proxy; route OTLP at the edge The in-app proxy (AdotOtlpService) forwarded every browser OTLP batch over loopback to the otel-collector sidecar. It was the source of Connection-reset-by-peer errors (stale pooled connections) and silent data loss that PR #3521 mitigated. The otel-collector sidecar already binds 0.0.0.0:4318 (OTLP HTTP) and is reachable from the ALB on the shared awsvpc network. Browser telemetry will be routed at the ALB directly to port 4318 via a dedicated listener rule and target group (infra change, separate PR). The collector's OTLP HTTP receiver will be configured to accept the same adblock-safe paths (/docs/_api/v1/o/{t,l,m}) directly (docs-internal-workflows change). This commit removes the proxy from docs-builder: - Delete AdotOtlpService, IOtlpService, OtlpForwardResult, OtlpProxyOptions, OtlpProxyRequest, and OtlpProxyTests - Remove OTLP endpoint mappings (/o/t, /o/l, /o/m) from MappingsExtensions - Remove AddOtlpProxyService from ServicesExtension - Revert OtelRegistration.HttpClientTracingFilter (added solely to silence the proxy's own forwarding hops, no longer needed) - Remove OtlpProxySourceName from TelemetryConstants and tracing setup The app's own server-side EDOT export (localhost:4317) is unchanged. Co-Authored-By: Claude Opus 4.8 --- .../OpenTelemetryRegistrationExtensions.cs | 13 +- .../MappingsExtensions.cs | 34 +- .../OpenTelemetry/OpenTelemetryExtensions.cs | 1 - src/api/Elastic.Documentation.Api/Program.cs | 12 +- .../ServicesExtension.cs | 38 -- .../Telemetry/AdotOtlpService.cs | 138 ------- .../Telemetry/IOtlpService.cs | 25 -- .../Telemetry/OtlpForwardResult.cs | 26 -- .../Telemetry/OtlpProxyOptions.cs | 51 --- .../Telemetry/OtlpProxyRequest.cs | 46 --- .../TelemetryConstants.cs | 6 - .../docs-builder/Http/DocumentationWebHost.cs | 3 +- .../docs-builder/Http/StaticWebHost.cs | 3 +- .../OtlpProxyTests.cs | 378 ------------------ 14 files changed, 5 insertions(+), 769 deletions(-) delete mode 100644 src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs delete mode 100644 src/api/Elastic.Documentation.Api/Telemetry/IOtlpService.cs delete mode 100644 src/api/Elastic.Documentation.Api/Telemetry/OtlpForwardResult.cs delete mode 100644 src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyOptions.cs delete mode 100644 src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyRequest.cs delete mode 100644 tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs diff --git a/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs b/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs index b7e1b5600..571aa0860 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs +++ b/src/Elastic.Documentation.ServiceDefaults/Telemetry/OpenTelemetryRegistrationExtensions.cs @@ -20,13 +20,6 @@ public record OtelRegistration(string ServiceName) public Action? Tracing { get; init; } public Action? Metrics { get; init; } public Action? Logging { get; init; } - /// - /// Optional filter for HttpClient tracing instrumentation. - /// Return false to exclude a request from tracing. - /// Use to suppress internal sidecar traffic (e.g. a localhost OTLP proxy hop) - /// that is already covered by an app-level span. - /// - public Func? HttpClientTracingFilter { get; init; } } public static class OpenTelemetryRegistrationExtensions @@ -98,11 +91,7 @@ public static TBuilder AddDocumentationOpenTelemetry(this TBuilder bui }; }) .AddProcessor() // Automatically add euid to all child spans - .AddHttpClientInstrumentation(opts => - { - if (registration.HttpClientTracingFilter is { } filter) - opts.FilterHttpRequestMessage = filter; - }); + .AddHttpClientInstrumentation(); registration.Tracing?.Invoke(options, tracing); }) .WithMetrics(metrics => diff --git a/src/api/Elastic.Documentation.Api/MappingsExtensions.cs b/src/api/Elastic.Documentation.Api/MappingsExtensions.cs index 8491c755b..8c7a90655 100644 --- a/src/api/Elastic.Documentation.Api/MappingsExtensions.cs +++ b/src/api/Elastic.Documentation.Api/MappingsExtensions.cs @@ -5,7 +5,6 @@ using System.Diagnostics; using System.Text.Json; using Elastic.Documentation.Api.AskAi; -using Elastic.Documentation.Api.Telemetry; using Elastic.Documentation.Search; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Http; @@ -17,17 +16,14 @@ namespace Elastic.Documentation.Api; public static class MappingsExtension { - public static void MapElasticDocsApiEndpoints(this IEndpointRouteBuilder group, bool mapOtlpEndpoints = true) + public static void MapElasticDocsApiEndpoints(this IEndpointRouteBuilder group) { - _ = group.MapGet("/", () => Results.Empty); _ = group.MapPost("/", () => Results.Empty); MapAskAiEndpoint(group); MapNavigationSearch(group); MapFullSearch(group); MapChanges(group); - if (mapOtlpEndpoints) - MapOtlpProxyEndpoint(group); } private static void MapAskAiEndpoint(IEndpointRouteBuilder group) @@ -180,32 +176,4 @@ Cancel ctx return Results.Ok(response); }); - private static void MapOtlpProxyEndpoint(IEndpointRouteBuilder group) - { - // Use /o/* to avoid adblocker detection (common blocklists target /otlp, /telemetry, etc.) - var otlpGroup = group.MapGroup("/o"); - - MapOtlpSignalEndpoint(otlpGroup, "/t", OtlpSignalType.Traces); - MapOtlpSignalEndpoint(otlpGroup, "/l", OtlpSignalType.Logs); - MapOtlpSignalEndpoint(otlpGroup, "/m", OtlpSignalType.Metrics); - } - - private static void MapOtlpSignalEndpoint( - IEndpointRouteBuilder group, - string path, - OtlpSignalType signalType) => - group.MapPost(path, - async (HttpContext context, IOtlpService otlpService, Cancel ctx) => - { - var contentType = context.Request.ContentType ?? "application/json"; - var result = await otlpService.ForwardOtlp( - signalType, - context.Request.Body, - contentType, - ctx); - return result.IsSuccess - ? Results.NoContent() - : Results.StatusCode(result.StatusCode); - }) - .DisableAntiforgery(); } diff --git a/src/api/Elastic.Documentation.Api/OpenTelemetry/OpenTelemetryExtensions.cs b/src/api/Elastic.Documentation.Api/OpenTelemetry/OpenTelemetryExtensions.cs index e99ef2448..755780220 100644 --- a/src/api/Elastic.Documentation.Api/OpenTelemetry/OpenTelemetryExtensions.cs +++ b/src/api/Elastic.Documentation.Api/OpenTelemetry/OpenTelemetryExtensions.cs @@ -17,7 +17,6 @@ public static TracerProviderBuilder AddDocsApiTracing(this TracerProviderBuilder _ = builder .AddSource(TelemetryConstants.AskAiSourceName) .AddSource(TelemetryConstants.StreamTransformerSourceName) - .AddSource(TelemetryConstants.OtlpProxySourceName) .AddSource(TelemetryConstants.CacheSourceName) .AddSource(TelemetryConstants.AskAiFeedbackSourceName) .AddAspNetCoreInstrumentation(aspNetCoreOptions => diff --git a/src/api/Elastic.Documentation.Api/Program.cs b/src/api/Elastic.Documentation.Api/Program.cs index 6837aa291..668f05d38 100644 --- a/src/api/Elastic.Documentation.Api/Program.cs +++ b/src/api/Elastic.Documentation.Api/Program.cs @@ -25,15 +25,6 @@ .AddDocumentationOpenTelemetry(new OtelRegistration("docs-api") { Tracing = (_, t) => t.AddDocsApiTracing(), - // Exclude the OtlpProxy's own forwarding hops from auto-instrumentation: these requests - // to the localhost ADOT sidecar are already covered by the app-level "forward otlp" span - // in AdotOtlpService, with correct status semantics. The raw HttpClient span would surface - // expected transient resets as error spans. - // Match on loopback host + OTLP HTTP path prefix; port-agnostic so it works whether the - // sidecar listens on 4318 (HTTP) or 4317 (gRPC-HTTP/1.1) or a custom OTEL_EXPORTER_OTLP_ENDPOINT. - HttpClientTracingFilter = req => - req.RequestUri is not { Host: "localhost" or "127.0.0.1" } otlpUri - || !otlpUri.AbsolutePath.StartsWith("/v1/", StringComparison.Ordinal) }) .HealthCheckBuilderExtensions(); @@ -79,8 +70,7 @@ var v1 = api.MapGroup("/v1"); - var mapOtlpEndpoints = !string.IsNullOrWhiteSpace(builder.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]); - v1.MapElasticDocsApiEndpoints(mapOtlpEndpoints); + v1.MapElasticDocsApiEndpoints(); Console.WriteLine("API endpoints mapped"); Console.WriteLine("Application startup completed successfully"); diff --git a/src/api/Elastic.Documentation.Api/ServicesExtension.cs b/src/api/Elastic.Documentation.Api/ServicesExtension.cs index 7450a3d12..ead042603 100644 --- a/src/api/Elastic.Documentation.Api/ServicesExtension.cs +++ b/src/api/Elastic.Documentation.Api/ServicesExtension.cs @@ -9,9 +9,7 @@ using Elastic.Documentation.Api.AskAi; using Elastic.Documentation.Api.Caching; using Elastic.Documentation.Api.Gcp; -using Elastic.Documentation.Api.Telemetry; using Elastic.Documentation.Search; -using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NetEscapades.EnumGenerators; @@ -73,7 +71,6 @@ private static void AddElasticDocsApiServices(this IServiceCollection services, AddDistributedCache(services, appEnv); AddAskAiServices(services, appEnv); AddSearchServices(services, appEnv); - AddOtlpProxyService(services, appEnv); } // Note: IParameterProvider is no longer needed - all options now read from IConfiguration (env vars) @@ -194,39 +191,4 @@ private static void AddSearchServices(IServiceCollection services, AppEnv appEnv logger?.LogInformation("Full search service registered with hybrid RRF support"); } - private static void AddOtlpProxyService(IServiceCollection services, AppEnv appEnv) - { - var logger = GetLogger(services); - logger?.LogInformation("Configuring OTLP proxy service for environment {AppEnvironment}", appEnv); - - _ = services.AddSingleton(sp => - { - var config = sp.GetRequiredService(); - return new OtlpProxyOptions(config); - }); - - // Register named HttpClient for OTLP proxy. - // 1s timeout: the collector is a localhost sidecar and should answer in single-digit ms. - // RemoveAllResilienceHandlers opts this client out of the global standard resilience handler - // (retries + 10s/30s timeouts) so a dead collector fails fast instead of blocking ~9s. - // PooledConnectionIdleTimeout=2s: close idle sockets before the ADOT sidecar closes them - // server-side, preventing stale-connection reuse across Lambda freeze/thaw cycles. - // PooledConnectionLifetime=30s: cap long-lived busy connections. -#pragma warning disable EXTEXP0001 // RemoveAllResilienceHandlers is experimental - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigureHttpClient(client => - { - client.Timeout = TimeSpan.FromSeconds(1); - }) - .ConfigurePrimaryHttpMessageHandler(() => new SocketsHttpHandler - { - PooledConnectionIdleTimeout = TimeSpan.FromSeconds(2), - PooledConnectionLifetime = TimeSpan.FromSeconds(30), - }) - .RemoveAllResilienceHandlers(); -#pragma warning restore EXTEXP0001 - - _ = services.AddScoped(); - logger?.LogInformation("OTLP proxy configured to forward to ADOT Lambda Layer collector"); - } } diff --git a/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs b/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs deleted file mode 100644 index b5003cfef..000000000 --- a/src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.Diagnostics; -using System.Diagnostics.Metrics; -using System.Net.Sockets; -using Microsoft.Extensions.Logging; - -namespace Elastic.Documentation.Api.Telemetry; - -/// -/// Service that forwards OTLP telemetry to the ADOT Lambda Layer collector. -/// -public class AdotOtlpService( - IHttpClientFactory httpClientFactory, - OtlpProxyOptions options, - ILogger logger) : IOtlpService -{ - public const string HttpClientName = "OtlpProxy"; - private static readonly ActivitySource ActivitySource = new(TelemetryConstants.OtlpProxySourceName); - private static readonly Meter Meter = new(TelemetryConstants.OtlpProxySourceName); - private static readonly Counter ForwardCounter = - Meter.CreateCounter("otlp.proxy.forward", - description: "OTLP batches processed, by outcome (forwarded/stale_drop/collector_unavailable/timeout/error) and signal_type"); - private readonly HttpClient _httpClient = httpClientFactory.CreateClient(HttpClientName); - - /// - public async Task ForwardOtlp( - OtlpSignalType signalType, - Stream requestBody, - string contentType, - Cancel ctx = default) - { - using var activity = ActivitySource.StartActivity("forward otlp", ActivityKind.Client); - var signalTag = signalType.ToStringFast(true); - _ = activity?.SetTag("otlp.signal_type", signalTag); - - try - { - var targetUrl = $"{options.Endpoint.TrimEnd('/')}/v1/{signalTag}"; - logger.LogDebug("Forwarding OTLP {SignalType} to ADOT collector at {TargetUrl}", signalType, targetUrl); - - using var request = new HttpRequestMessage(HttpMethod.Post, targetUrl); - request.Content = new StreamContent(requestBody); - _ = request.Content.Headers.TryAddWithoutValidation("Content-Type", contentType); - - using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseContentRead, ctx); - var responseContent = response.Content.Headers.ContentLength > 0 - ? await response.Content.ReadAsStringAsync(ctx) - : string.Empty; - - if (!response.IsSuccessStatusCode) - { - logger.LogError("OTLP forward to ADOT failed with status {StatusCode}: {Content}", - response.StatusCode, responseContent); - _ = activity?.SetStatus(ActivityStatusCode.Error, $"Collector returned {(int)response.StatusCode}"); - ForwardCounter.Add(1, - new KeyValuePair("outcome", "error"), - new KeyValuePair("signal_type", signalTag)); - } - else - { - logger.LogDebug("Successfully forwarded OTLP {SignalType} to ADOT collector", signalType); - _ = activity?.SetStatus(ActivityStatusCode.Ok); - ForwardCounter.Add(1, - new KeyValuePair("outcome", "forwarded"), - new KeyValuePair("signal_type", signalTag)); - } - - return new OtlpForwardResult - { - StatusCode = (int)response.StatusCode, - Content = responseContent - }; - } - catch (Exception ex) - { - var (statusCode, message) = MapExceptionToStatusCode(ex); - if (statusCode == 204) - { - // Stale connection: streaming body cannot be replayed under the zero-copy constraint, - // so this batch is dropped best-effort. Rare after PooledConnectionIdleTimeout tuning. - // Leave activity status unset (not Error) — this is an expected transient condition. - _ = activity?.SetTag("otlp.proxy.outcome", "stale_drop"); - logger.LogDebug("Dropped OTLP {SignalType} batch on stale connection; collector will reconnect", signalType); - } - else - { - _ = activity?.SetStatus(ActivityStatusCode.Error, message); - logger.LogError(ex, "Error forwarding OTLP {SignalType}: {ErrorMessage}", signalType, message); - } - ForwardCounter.Add(1, - new KeyValuePair("outcome", OutcomeTag(statusCode)), - new KeyValuePair("signal_type", signalTag)); - return new OtlpForwardResult { StatusCode = statusCode, Content = message }; - } - } - - private static string OutcomeTag(int statusCode) => statusCode switch - { - 204 => "stale_drop", - 503 => "collector_unavailable", - 504 => "timeout", - _ => "error" - }; - - private static (int StatusCode, string Message) MapExceptionToStatusCode(Exception ex) => - ex switch - { - // Connection refused - downstream service not available - HttpRequestException { InnerException: SocketException { SocketErrorCode: SocketError.ConnectionRefused } } - => (503, "Telemetry collector unavailable"), - - // Timeout - gateway timeout - HttpRequestException { InnerException: SocketException { SocketErrorCode: SocketError.TimedOut } } - => (504, "Telemetry collector timeout"), - - TaskCanceledException or OperationCanceledException - => (504, "Request to telemetry collector timed out"), - - // Stale connection reset — streaming body cannot be replayed (zero-copy proxy constraint), - // so the batch is dropped best-effort. Return 204 so the browser exporter doesn't treat it - // as a retryable 502. Rare after PooledConnectionIdleTimeout tuning. - HttpRequestException { InnerException: SocketException { SocketErrorCode: SocketError.ConnectionReset } } - => (204, string.Empty), - - HttpRequestException { InnerException: IOException } - => (204, string.Empty), - - // Other HTTP/network errors - bad gateway - HttpRequestException - => (502, "Failed to communicate with telemetry collector"), - - // Unknown errors - _ => (500, $"Internal error: {ex.Message}") - }; -} diff --git a/src/api/Elastic.Documentation.Api/Telemetry/IOtlpService.cs b/src/api/Elastic.Documentation.Api/Telemetry/IOtlpService.cs deleted file mode 100644 index 77fcb9c19..000000000 --- a/src/api/Elastic.Documentation.Api/Telemetry/IOtlpService.cs +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -namespace Elastic.Documentation.Api.Telemetry; - -/// -/// Service for forwarding OTLP telemetry to a collector. -/// -public interface IOtlpService -{ - /// - /// Forwards OTLP telemetry data to the collector. - /// - /// The OTLP signal type (traces, logs, or metrics) - /// The raw OTLP payload stream - /// Content-Type of the payload - /// Cancellation token - /// Result containing HTTP status code and response content - Task ForwardOtlp( - OtlpSignalType signalType, - Stream requestBody, - string contentType, - Cancel ctx = default); -} diff --git a/src/api/Elastic.Documentation.Api/Telemetry/OtlpForwardResult.cs b/src/api/Elastic.Documentation.Api/Telemetry/OtlpForwardResult.cs deleted file mode 100644 index 9acddc8fb..000000000 --- a/src/api/Elastic.Documentation.Api/Telemetry/OtlpForwardResult.cs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -namespace Elastic.Documentation.Api.Telemetry; - -/// -/// Result of forwarding OTLP telemetry to a collector. -/// -public record OtlpForwardResult -{ - /// - /// HTTP status code from the collector response. - /// - public required int StatusCode { get; init; } - - /// - /// Response content from the collector, if any. - /// - public string? Content { get; init; } - - /// - /// Whether the forward operation was successful (2xx status code). - /// - public bool IsSuccess => StatusCode is >= 200 and < 300; -} diff --git a/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyOptions.cs b/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyOptions.cs deleted file mode 100644 index 6e355de68..000000000 --- a/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyOptions.cs +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Microsoft.Extensions.Configuration; - -namespace Elastic.Documentation.Api.Telemetry; - -/// -/// Configuration options for the OTLP proxy. -/// The proxy forwards telemetry to a local OTLP collector (typically ADOT Lambda Layer). -/// -/// -/// ADOT Lambda Layer runs a local OpenTelemetry Collector that accepts OTLP/HTTP on: -/// - localhost:4318 (HTTP/JSON and HTTP/protobuf) -/// - localhost:4317 (gRPC) -/// -/// Configuration priority: -/// 1. OtlpProxy:Endpoint in IConfiguration (for tests/overrides) -/// 2. OTEL_EXPORTER_OTLP_ENDPOINT environment variable -/// 3. Default: http://localhost:4318 -/// -/// The proxy will return 503 if the collector is not available. -/// -public class OtlpProxyOptions(IConfiguration configuration) -{ - /// - /// OTLP endpoint URL for the local ADOT collector. - /// Defaults to localhost:4318 when running in Lambda with ADOT layer. - /// - public string Endpoint { get; } = ResolveEndpoint(configuration); - - private static string ResolveEndpoint(IConfiguration configuration) - { - const string configKey = "OtlpProxy:Endpoint"; - const string defaultEndpoint = "http://localhost:4318"; - - // Priority 1: Explicit configuration (for tests or custom deployments) - if (!string.IsNullOrEmpty(configuration[configKey])) - return configuration[configKey]!; - - // Priority 2: Standard OTEL env var, then legacy fallback - var endpoint = Environment.GetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT") - ?? Environment.GetEnvironmentVariable("OTLP_PROXY_ENDPOINT"); - if (!string.IsNullOrEmpty(endpoint)) - return endpoint; - - // Priority 3: Default (ADOT Lambda Layer collector) - return defaultEndpoint; - } -} diff --git a/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyRequest.cs b/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyRequest.cs deleted file mode 100644 index 3a5b48d5e..000000000 --- a/src/api/Elastic.Documentation.Api/Telemetry/OtlpProxyRequest.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.ComponentModel.DataAnnotations; -using NetEscapades.EnumGenerators; - -namespace Elastic.Documentation.Api.Telemetry; - -/// -/// OTLP signal types supported by the proxy. -/// The Display names match the OTLP path segments (lowercase). -/// -[EnumExtensions] -public enum OtlpSignalType -{ - /// - /// Distributed traces - maps to /v1/traces - /// - [Display(Name = "traces")] - Traces, - - /// - /// Log records - maps to /v1/logs - /// - [Display(Name = "logs")] - Logs, - - /// - /// Metrics data - maps to /v1/metrics - /// - [Display(Name = "metrics")] - Metrics -} - -/// -/// Request model for OTLP proxy endpoint. -/// Accepts raw OTLP payload from frontend and forwards to configured OTLP endpoint. -/// -public class OtlpProxyRequest -{ - /// - /// The OTLP signal type: traces, logs, or metrics - /// - public required string SignalType { get; init; } -} diff --git a/src/api/Elastic.Documentation.Api/TelemetryConstants.cs b/src/api/Elastic.Documentation.Api/TelemetryConstants.cs index 5e33d70fc..0d0b6028f 100644 --- a/src/api/Elastic.Documentation.Api/TelemetryConstants.cs +++ b/src/api/Elastic.Documentation.Api/TelemetryConstants.cs @@ -27,12 +27,6 @@ public static class TelemetryConstants /// public const string StreamTransformerSourceName = "Elastic.Documentation.Api.StreamTransformer"; - /// - /// ActivitySource name for OTLP proxy operations. - /// Used to trace frontend telemetry proxying. - /// - public const string OtlpProxySourceName = "Elastic.Documentation.Api.OtlpProxy"; - /// /// ActivitySource name for distributed cache operations. /// Used to trace cache hits, misses, and performance. diff --git a/src/tooling/docs-builder/Http/DocumentationWebHost.cs b/src/tooling/docs-builder/Http/DocumentationWebHost.cs index b48d33b9a..c513233a0 100644 --- a/src/tooling/docs-builder/Http/DocumentationWebHost.cs +++ b/src/tooling/docs-builder/Http/DocumentationWebHost.cs @@ -158,8 +158,7 @@ private void SetUpRoutes() #if DEBUG var apiV1 = _webApplication.MapGroup($"{SystemEnvironmentVariables.Instance.ApiPrefix}/v1"); - var mapOtlpEndpoints = !string.IsNullOrWhiteSpace(_webApplication.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]); - apiV1.MapElasticDocsApiEndpoints(mapOtlpEndpoints); + apiV1.MapElasticDocsApiEndpoints(); #endif // SSE endpoint for diagnostics streaming diff --git a/src/tooling/docs-builder/Http/StaticWebHost.cs b/src/tooling/docs-builder/Http/StaticWebHost.cs index 250d77218..5b4b1e620 100644 --- a/src/tooling/docs-builder/Http/StaticWebHost.cs +++ b/src/tooling/docs-builder/Http/StaticWebHost.cs @@ -86,8 +86,7 @@ private void SetUpRoutes() #if DEBUG var apiV1 = WebApplication.MapGroup($"{SystemEnvironmentVariables.Instance.ApiPrefix}/v1"); - var mapOtlpEndpoints = !string.IsNullOrWhiteSpace(WebApplication.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]); - apiV1.MapElasticDocsApiEndpoints(mapOtlpEndpoints); + apiV1.MapElasticDocsApiEndpoints(); #endif } diff --git a/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs b/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs deleted file mode 100644 index 6f8177d74..000000000 --- a/tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs +++ /dev/null @@ -1,378 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.Net; -using System.Text; -using AwesomeAssertions; -using Elastic.Documentation.Api.Telemetry; -using Elastic.Documentation.Api.Tests.Fixtures; -using FakeItEasy; -using Microsoft.Extensions.DependencyInjection; -using Xunit; - -namespace Elastic.Documentation.Api.Tests; - -public class OtlpProxyTests -{ - private const string OtlpEndpoint = "http://localhost:4318"; - - [Fact] - public async Task OtlpProxyTracesEndpointForwardsToCorrectUrl() - { - // Arrange - var mockHandler = A.Fake(); - var capturedRequest = (HttpRequestMessage?)null; - - // Create mock response (will be disposed by HttpClient) - var mockResponse = new HttpResponseMessage(HttpStatusCode.OK) - { - Content = new StringContent("{}") - }; - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes((HttpRequestMessage req, CancellationToken ct) => capturedRequest = req) - .Returns(Task.FromResult(mockResponse)); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - // Replace the named HttpClient with our mock - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - var otlpPayload = /*lang=json,strict*/ """ - { - "resourceSpans": [{ - "scopeSpans": [{ - "spans": [{ - "traceId": "0123456789abcdef0123456789abcdef", - "spanId": "0123456789abcdef", - "name": "test-span" - }] - }] - }] - } - """; - - using var content = new StringContent(otlpPayload, Encoding.UTF8, "application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - // Assert - verify the request was forwarded to the correct URL - if (!response.IsSuccessStatusCode) - { - var errorBody = await response.Content.ReadAsStringAsync(TestContext.Current.CancellationToken); - throw new Exception($"Test failed with {response.StatusCode}: {errorBody}"); - } - - response.StatusCode.Should().Be(HttpStatusCode.NoContent); - capturedRequest.Should().NotBeNull(); - capturedRequest!.RequestUri.Should().NotBeNull(); - capturedRequest.RequestUri!.ToString().Should().Be($"{OtlpEndpoint}/v1/traces"); - capturedRequest.Method.Should().Be(HttpMethod.Post); - capturedRequest.Content.Should().NotBeNull(); - capturedRequest.Content!.Headers.ContentType!.MediaType.Should().Be("application/json"); - - // Cleanup mock response - mockResponse.Dispose(); - } - - [Fact] - public async Task OtlpProxyLogsEndpointForwardsToCorrectUrl() - { - // Arrange - var mockHandler = A.Fake(); - var capturedRequest = (HttpRequestMessage?)null; - - // Create mock response (will be disposed by HttpClient) - var mockResponse = new HttpResponseMessage(HttpStatusCode.OK) - { - Content = new StringContent("{}") - }; - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes((HttpRequestMessage req, CancellationToken ct) => capturedRequest = req) - .Returns(Task.FromResult(mockResponse)); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - var otlpPayload = /*lang=json,strict*/ """ - { - "resourceLogs": [{ - "scopeLogs": [{ - "logRecords": [{ - "timeUnixNano": "1672531200000000000", - "severityNumber": 9, - "severityText": "INFO", - "body": { - "stringValue": "Test log" - } - }] - }] - }] - } - """; - - using var content = new StringContent(otlpPayload, Encoding.UTF8, "application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/l", content, TestContext.Current.CancellationToken); - - // Assert - verify the enum ToStringFast() generates "logs" (lowercase) - response.StatusCode.Should().Be(HttpStatusCode.NoContent); - capturedRequest.Should().NotBeNull(); - capturedRequest!.RequestUri!.ToString().Should().Be($"{OtlpEndpoint}/v1/logs"); - - // Cleanup mock response - mockResponse.Dispose(); - } - - [Fact] - public async Task OtlpProxyMetricsEndpointForwardsToCorrectUrl() - { - // Arrange - var mockHandler = A.Fake(); - var capturedRequest = (HttpRequestMessage?)null; - - // Create mock response (will be disposed by HttpClient) - var mockResponse = new HttpResponseMessage(HttpStatusCode.OK) - { - Content = new StringContent("{}") - }; - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes((HttpRequestMessage req, CancellationToken ct) => capturedRequest = req) - .Returns(Task.FromResult(mockResponse)); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - var otlpPayload = /*lang=json,strict*/ """ - { - "resourceMetrics": [{ - "scopeMetrics": [{ - "metrics": [{ - "name": "test_metric", - "unit": "1" - }] - }] - }] - } - """; - - using var content = new StringContent(otlpPayload, Encoding.UTF8, "application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/m", content, TestContext.Current.CancellationToken); - - // Assert - verify the enum ToStringFast() generates "metrics" (lowercase) - response.StatusCode.Should().Be(HttpStatusCode.NoContent); - capturedRequest.Should().NotBeNull(); - capturedRequest!.RequestUri!.ToString().Should().Be($"{OtlpEndpoint}/v1/metrics"); - - // Cleanup mock response - mockResponse.Dispose(); - } - - [Fact] - public async Task OtlpProxyReturnsCollectorErrorStatusCode() - { - // Arrange - var mockHandler = A.Fake(); - - // Create mock response (will be disposed by HttpClient) - var mockResponse = new HttpResponseMessage(HttpStatusCode.ServiceUnavailable) - { - Content = new StringContent("Service unavailable") - }; - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Returns(Task.FromResult(mockResponse)); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { -#pragma warning disable EXTEXP0001 // Experimental API - needed for test to bypass resilience handlers - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler) - .RemoveAllResilienceHandlers(); -#pragma warning restore EXTEXP0001 - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - using var content = new StringContent("{}", Encoding.UTF8, "application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - var responseContent = await response.Content.ReadAsStringAsync(TestContext.Current.CancellationToken); - // Assert - verify error responses are properly forwarded - response.StatusCode.Should().Be(HttpStatusCode.ServiceUnavailable, "{0}", responseContent); - - - // Cleanup mock response - mockResponse.Dispose(); - } - - [Fact] - public async Task OtlpProxy_CollectorUnavailable_FailsFastWithoutRetries() - { - // Arrange - var callCount = 0; - var mockHandler = A.Fake(); - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes((HttpRequestMessage _, CancellationToken _) => callCount++) - .Throws(new HttpRequestException("Connection refused", - new System.Net.Sockets.SocketException((int)System.Net.Sockets.SocketError.ConnectionRefused))); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - using var content = new StringContent("{}", Encoding.UTF8, "application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - // Assert — exactly one attempt, no retries - callCount.Should().Be(1, "telemetry forwarding must fail fast with no retries"); - response.StatusCode.Should().Be(HttpStatusCode.ServiceUnavailable); - } - - [Fact] - public async Task OtlpProxy_ForwardedBodyMatchesInput() - { - // Arrange - var mockHandler = A.Fake(); - var capturedBody = (byte[]?)null; - - var mockResponse = new HttpResponseMessage(HttpStatusCode.OK) - { - Content = new StringContent("{}") - }; - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes(async (HttpRequestMessage req, CancellationToken ct) => - capturedBody = await req.Content!.ReadAsByteArrayAsync(ct)) - .Returns(Task.FromResult(mockResponse)); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - var originalPayload = Encoding.UTF8.GetBytes(/*lang=json,strict*/ """{"resourceSpans":[]}"""); - using var content = new ByteArrayContent(originalPayload); - content.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json"); - - // Act - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - // Assert — bytes arriving at the collector must exactly match the original payload - response.StatusCode.Should().Be(HttpStatusCode.NoContent); - capturedBody.Should().NotBeNull(); - capturedBody.Should().BeEquivalentTo(originalPayload); - - mockResponse.Dispose(); - } - - [Fact] - public async Task OtlpProxy_SendAsyncThrowsNonIo_MapsToGatewayError() - { - var callCount = 0; - var mockHandler = A.Fake(); - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Invokes((HttpRequestMessage _, CancellationToken _) => callCount++) - .Throws(new HttpRequestException("Some non-IO network error")); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - using var content = new StringContent(/*lang=json,strict*/ """{"resourceSpans":[]}""", Encoding.UTF8, "application/json"); - - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - callCount.Should().Be(1); - response.StatusCode.Should().Be(HttpStatusCode.BadGateway); - } - - [Fact] - public async Task OtlpProxy_StaleConnection_DropsWithNoContent() - { - // The proxy streams the body zero-copy (no buffering), so a stale connection cannot be - // recovered by replaying the request. The batch is dropped best-effort and the proxy - // returns 204 so the browser OTLP exporter doesn't interpret it as a retryable 502. - // In practice this path is rare because PooledConnectionIdleTimeout closes idle sockets - // before the ADOT sidecar can reset them. - var mockHandler = A.Fake(); - - A.CallTo(mockHandler) - .Where(call => call.Method.Name == "SendAsync") - .WithReturnType>() - .Throws(new HttpRequestException("Stale connection", new IOException("Connection reset by peer"))); - - using var factory = ApiWebApplicationFactory.WithMockedServices(services => - { - _ = services.AddHttpClient(AdotOtlpService.HttpClientName) - .ConfigurePrimaryHttpMessageHandler(() => mockHandler); - }, otlpEndpoint: OtlpEndpoint); - - var client = factory.CreateClient(); - using var content = new StringContent(/*lang=json,strict*/ """{"resourceSpans":[]}""", Encoding.UTF8, "application/json"); - - using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken); - - response.StatusCode.Should().Be(HttpStatusCode.NoContent); - } - - [Fact] - public async Task OtlpProxyInvalidSignalTypeReturns404() - { - // Arrange - using var factory = new ApiWebApplicationFactory(); - using var client = factory.CreateClient(); - using var content = new StringContent("{}", Encoding.UTF8, "application/json"); - - // Act - use invalid signal type - using var response = await client.PostAsync("/docs/_api/v1/o/invalid", content, TestContext.Current.CancellationToken); - - // Assert - route doesn't exist - response.StatusCode.Should().Be(HttpStatusCode.NotFound); - } -}