|
5 | 5 | "fmt" |
6 | 6 | "log/slog" |
7 | 7 | "math/rand" |
| 8 | + "net/http" |
8 | 9 | "net/netip" |
9 | 10 | "os" |
10 | 11 | "sync" |
|
72 | 73 | minConns int |
73 | 74 | dnsListenAddr string |
74 | 75 | autoCreate bool |
| 76 | + healthAddr string |
75 | 77 |
|
76 | 78 | preserveDefaultGwDsts []netip.Prefix |
77 | 79 | ) |
@@ -232,6 +234,33 @@ func (t *tunnelNodeReconciler) run(ctx context.Context, tn *corev1alpha.TunnelNo |
232 | 234 | return nil |
233 | 235 | }) |
234 | 236 |
|
| 237 | + // Start health endpoint server if configured |
| 238 | + if healthAddr != "" { |
| 239 | + mux := http.NewServeMux() |
| 240 | + mux.HandleFunc("/healthz", t.healthHandler) |
| 241 | + |
| 242 | + healthServer := &http.Server{ |
| 243 | + Addr: healthAddr, |
| 244 | + Handler: mux, |
| 245 | + } |
| 246 | + |
| 247 | + g.Go(func() error { |
| 248 | + slog.Info("Starting health endpoint server", slog.String("address", healthAddr)) |
| 249 | + if err := healthServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { |
| 250 | + slog.Error("Health server failed", slog.Any("error", err)) |
| 251 | + return err |
| 252 | + } |
| 253 | + return nil |
| 254 | + }) |
| 255 | + |
| 256 | + g.Go(func() error { |
| 257 | + <-gctx.Done() |
| 258 | + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
| 259 | + defer cancel() |
| 260 | + return healthServer.Shutdown(shutdownCtx) |
| 261 | + }) |
| 262 | + } |
| 263 | + |
235 | 264 | r, err := tunnel.BuildClientRouter( |
236 | 265 | tunnel.WithPcapPath(tunnelNodePcapPath), |
237 | 266 | tunnel.WithMode(tunnelMode), |
@@ -403,3 +432,32 @@ func (t *tunnelNodeReconciler) reconcile(ctx context.Context, req ctrl.Request) |
403 | 432 |
|
404 | 433 | return ctrl.Result{}, nil |
405 | 434 | } |
| 435 | + |
| 436 | +// healthHandler returns 200 OK when at least one tunnel connection is active, 503 otherwise. |
| 437 | +// This endpoint is used for health checks to determine if the tunnel node has active connections. |
| 438 | +// The health endpoint is only started when the --health-endpoint flag is provided with a valid |
| 439 | +// address (e.g., ":8080" or "0.0.0.0:8080"). |
| 440 | +// |
| 441 | +// Response codes: |
| 442 | +// - 200 OK: At least one tunnel connection is active |
| 443 | +// - 503 Service Unavailable: No active tunnel connections |
| 444 | +func (t *tunnelNodeReconciler) healthHandler(w http.ResponseWriter, r *http.Request) { |
| 445 | + t.tunMu.RLock() |
| 446 | + defer t.tunMu.RUnlock() |
| 447 | + |
| 448 | + // Check if we have at least one active connection |
| 449 | + activeConns := 0 |
| 450 | + for _, conn := range t.tunDialerWorkers { |
| 451 | + if conn.conn != nil && conn.conn.Context().Err() == nil { |
| 452 | + activeConns++ |
| 453 | + } |
| 454 | + } |
| 455 | + |
| 456 | + if activeConns > 0 { |
| 457 | + w.WriteHeader(http.StatusOK) |
| 458 | + fmt.Fprintf(w, "OK - %d active connection(s)\n", activeConns) |
| 459 | + } else { |
| 460 | + w.WriteHeader(http.StatusServiceUnavailable) |
| 461 | + fmt.Fprintf(w, "UNHEALTHY - no active connections\n") |
| 462 | + } |
| 463 | +} |
0 commit comments