From eb5baa86366cf83600f8a3a0d203f8ac56b440ef Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 01:46:58 +0000 Subject: [PATCH 1/8] feat(security): separate iptables setup into init container Add awf-iptables-init service that shares the agent's network namespace via network_mode: "service:agent" and runs setup-iptables.sh before signaling readiness. The agent container never receives NET_ADMIN capability, eliminating the startup window where privileged capabilities were held. Key changes: - Add iptables-init service to docker-compose with NET_ADMIN + cap_drop ALL - Remove NET_ADMIN from agent container's cap_add - Agent entrypoint waits for /tmp/awf-init/ready signal (30s timeout) - Init container uses same image as agent, exits after iptables setup - Update cleanup scripts to handle awf-iptables-init container Fixes #375 Co-Authored-By: Claude Opus 4.6 (1M context) --- containers/agent/entrypoint.sh | 39 +++++++++++++++---- scripts/ci/cleanup.sh | 2 +- src/docker-manager.test.ts | 40 +++++++++++++++----- src/docker-manager.ts | 69 +++++++++++++++++++++++++++++++--- 4 files changed, 127 insertions(+), 23 deletions(-) diff --git a/containers/agent/entrypoint.sh b/containers/agent/entrypoint.sh index ad454c49..afa2ce86 100644 --- a/containers/agent/entrypoint.sh +++ b/containers/agent/entrypoint.sh @@ -115,8 +115,22 @@ if [ "${AWF_SSL_BUMP_ENABLED}" = "true" ]; then fi fi -# Setup iptables rules -/usr/local/bin/setup-iptables.sh +# Wait for iptables init container to complete setup +# The awf-iptables-init container shares our network namespace and runs +# setup-iptables.sh, then writes a ready signal file. This ensures the agent +# container NEVER needs NET_ADMIN capability. +echo "[entrypoint] Waiting for iptables initialization from init container..." +INIT_TIMEOUT=300 # 300 * 0.1s = 30 seconds +INIT_ELAPSED=0 +while [ ! -f /tmp/awf-init/ready ]; do + if [ "$INIT_ELAPSED" -ge "$INIT_TIMEOUT" ]; then + echo "[entrypoint][ERROR] Timed out waiting for iptables init container after 30s" + exit 1 + fi + sleep 0.1 + INIT_ELAPSED=$((INIT_ELAPSED + 1)) +done +echo "[entrypoint] iptables initialization complete" # Run API proxy health checks (verifies credential isolation and connectivity) # This must run AFTER iptables setup (which allows api-proxy traffic) but BEFORE user command @@ -275,15 +289,19 @@ runuser -u awfuser -- git config --global --add safe.directory '*' 2>/dev/null | echo "[entrypoint] ==================================" # Determine which capabilities to drop -# - CAP_NET_ADMIN is always dropped (prevents iptables bypass) +# - CAP_NET_ADMIN is NOT present (never granted to agent container - iptables setup +# is handled by the separate awf-iptables-init container) # - CAP_SYS_CHROOT is dropped when chroot mode is enabled (prevents user code from using chroot) # - CAP_SYS_ADMIN is dropped when chroot mode is enabled (was needed for mounting procfs) if [ "${AWF_CHROOT_ENABLED}" = "true" ]; then - CAPS_TO_DROP="cap_net_admin,cap_sys_chroot,cap_sys_admin" - echo "[entrypoint] Chroot mode enabled - dropping CAP_NET_ADMIN, CAP_SYS_CHROOT, and CAP_SYS_ADMIN" + CAPS_TO_DROP="cap_sys_chroot,cap_sys_admin" + echo "[entrypoint] Chroot mode enabled - dropping CAP_SYS_CHROOT and CAP_SYS_ADMIN" else - CAPS_TO_DROP="cap_net_admin" - echo "[entrypoint] Dropping CAP_NET_ADMIN capability" + # In non-chroot mode, no capabilities need to be dropped + # NET_ADMIN is never granted (init container handles iptables) + # SYS_CHROOT and SYS_ADMIN are only needed/dropped in chroot mode + CAPS_TO_DROP="" + echo "[entrypoint] No capabilities to drop (NET_ADMIN never granted to agent)" fi # Function to unset sensitive tokens from the entrypoint's environment @@ -650,7 +668,12 @@ else # SECURITY: Run agent command in background, then unset tokens from parent shell # This prevents tokens from being accessible via /proc/1/environ after agent starts # The one-shot-token library caches tokens in the agent process, so agent can still read them - capsh --drop=$CAPS_TO_DROP -- -c "exec gosu awfuser $(printf '%q ' "$@")" & + if [ -n "$CAPS_TO_DROP" ]; then + capsh --drop=$CAPS_TO_DROP -- -c "exec gosu awfuser $(printf '%q ' "$@")" & + else + # No capabilities to drop - just switch to unprivileged user + gosu awfuser "$@" & + fi AGENT_PID=$! # Wait for agent to initialize and cache tokens (5 seconds) diff --git a/scripts/ci/cleanup.sh b/scripts/ci/cleanup.sh index 690a4119..cd921717 100755 --- a/scripts/ci/cleanup.sh +++ b/scripts/ci/cleanup.sh @@ -12,7 +12,7 @@ echo "===========================================" # First, explicitly remove containers by name (handles orphaned containers) echo "Removing awf containers by name..." -docker rm -f awf-squid awf-agent awf-api-proxy 2>/dev/null || true +docker rm -f awf-squid awf-agent awf-iptables-init awf-api-proxy 2>/dev/null || true # Cleanup diagnostic test containers echo "Stopping docker compose services..." diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 06238722..2df2b45e 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -727,11 +727,12 @@ describe('docker-manager', () => { expect(volumes).toContain(`${homeDir}/.copilot:/host${homeDir}/.copilot:rw`); }); - it('should add SYS_CHROOT and SYS_ADMIN capabilities', () => { + it('should add SYS_CHROOT and SYS_ADMIN capabilities but NOT NET_ADMIN', () => { const result = generateDockerCompose(mockConfig, mockNetworkConfig); const agent = result.services.agent; - expect(agent.cap_add).toContain('NET_ADMIN'); + // NET_ADMIN is NOT on the agent - it's on the iptables-init container + expect(agent.cap_add).not.toContain('NET_ADMIN'); expect(agent.cap_add).toContain('SYS_CHROOT'); // SYS_ADMIN is needed to mount procfs at /host/proc for dynamic /proc/self/exe expect(agent.cap_add).toContain('SYS_ADMIN'); @@ -1062,14 +1063,35 @@ describe('docker-manager', () => { expect(depends['squid-proxy'].condition).toBe('service_healthy'); }); - it('should add NET_ADMIN capability to agent for iptables setup', () => { - // NET_ADMIN is required at container start for setup-iptables.sh - // The capability is dropped before user command execution via capsh - // (see containers/agent/entrypoint.sh) + it('should NOT add NET_ADMIN to agent (handled by iptables-init container)', () => { + // NET_ADMIN is NOT granted to the agent container. + // iptables setup is performed by the awf-iptables-init service which shares + // the agent's network namespace. const result = generateDockerCompose(mockConfig, mockNetworkConfig); const agent = result.services.agent; - expect(agent.cap_add).toContain('NET_ADMIN'); + expect(agent.cap_add).not.toContain('NET_ADMIN'); + }); + + it('should add iptables-init service with NET_ADMIN capability', () => { + const result = generateDockerCompose(mockConfig, mockNetworkConfig); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const initService = result.services['iptables-init'] as any; + + expect(initService).toBeDefined(); + expect(initService.container_name).toBe('awf-iptables-init'); + expect(initService.cap_add).toEqual(['NET_ADMIN']); + expect(initService.cap_drop).toEqual(['ALL']); + expect(initService.network_mode).toBe('service:agent'); + expect(initService.depends_on).toEqual({ + 'agent': { condition: 'service_started' }, + }); + expect(initService.command).toEqual([ + '/bin/bash', '-c', + '/usr/local/bin/setup-iptables.sh && touch /tmp/awf-init/ready', + ]); + expect(initService.security_opt).toContain('no-new-privileges:true'); + expect(initService.restart).toBe('no'); }); it('should apply container hardening measures', () => { @@ -1419,7 +1441,7 @@ describe('docker-manager', () => { expect(result.services.agent.working_dir).toBe('/custom/workdir'); // Verify other config is still present expect(result.services.agent.container_name).toBe('awf-agent'); - expect(result.services.agent.cap_add).toContain('NET_ADMIN'); + expect(result.services.agent.cap_add).toContain('SYS_CHROOT'); }); it('should handle empty string containerWorkDir by not setting working_dir', () => { @@ -2336,7 +2358,7 @@ describe('docker-manager', () => { expect(mockExecaFn).toHaveBeenCalledWith( 'docker', - ['rm', '-f', 'awf-squid', 'awf-agent', 'awf-api-proxy'], + ['rm', '-f', 'awf-squid', 'awf-agent', 'awf-iptables-init', 'awf-api-proxy'], { reject: false } ); }); diff --git a/src/docker-manager.ts b/src/docker-manager.ts index 65c1afe8..d2ce46e3 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -543,6 +543,12 @@ export function generateDockerCompose( // Only mount the workspace directory ($GITHUB_WORKSPACE or current working directory) // to prevent access to credential files in $HOME const workspaceDir = process.env.GITHUB_WORKSPACE || process.cwd(); + // Create init-signal directory for iptables init container coordination + const initSignalDir = path.join(config.workDir, 'init-signal'); + if (!fs.existsSync(initSignalDir)) { + fs.mkdirSync(initSignalDir, { recursive: true }); + } + const agentVolumes: string[] = [ // Essential mounts that are always included '/tmp:/tmp:rw', @@ -551,6 +557,8 @@ export function generateDockerCompose( `${workspaceDir}:${workspaceDir}:rw`, // Mount agent logs directory to workDir for persistence `${config.workDir}/agent-logs:${effectiveHome}/.copilot/logs:rw`, + // Init signal volume for iptables init container coordination + `${initSignalDir}:/tmp/awf-init:rw`, ]; // Volume mounts for chroot /host to work properly with host binaries @@ -936,13 +944,15 @@ export function generateDockerCompose( condition: 'service_healthy', }, }, - // NET_ADMIN is required for iptables setup in entrypoint.sh. + // SECURITY: NET_ADMIN is NOT granted to the agent container. + // iptables setup is performed by the awf-iptables-init service which shares + // the agent's network namespace via network_mode: "service:agent". // SYS_CHROOT is required for chroot operations. // SYS_ADMIN is required to mount procfs at /host/proc (required for // dynamic /proc/self/exe resolution needed by .NET CLR and other runtimes). - // Security: All capabilities are dropped before running user commands - // via 'capsh --drop=cap_net_admin,cap_sys_chroot,cap_sys_admin' in entrypoint.sh. - cap_add: ['NET_ADMIN', 'SYS_CHROOT', 'SYS_ADMIN'], + // Security: SYS_CHROOT and SYS_ADMIN are dropped before running user commands + // via 'capsh --drop=cap_sys_chroot,cap_sys_admin' in entrypoint.sh. + cap_add: ['SYS_CHROOT', 'SYS_ADMIN'], // Drop capabilities to reduce attack surface (security hardening) cap_drop: [ 'NET_RAW', // Prevents raw socket creation (iptables bypass attempts) @@ -1031,10 +1041,59 @@ export function generateDockerCompose( agentService.image = agentImage; } + // SECURITY: iptables init container - sets up NAT rules in a separate container + // that shares the agent's network namespace but NEVER gives NET_ADMIN to the agent. + // This eliminates the window where the agent holds NET_ADMIN during startup. + const iptablesInitService: any = { + container_name: 'awf-iptables-init', + // Share agent's network namespace so iptables rules apply to agent's traffic + network_mode: 'service:agent', + // Only mount the init signal volume and the iptables setup script + volumes: [ + `${initSignalDir}:/tmp/awf-init:rw`, + ], + environment: { + // Pass through environment variables needed by setup-iptables.sh + AWF_SQUID_HOST: environment.AWF_SQUID_HOST || `${networkConfig.squidIp}`, + AWF_SQUID_PORT: String(SQUID_PORT), + AWF_DNS_SERVERS: environment.AWF_DNS_SERVERS || '', + AWF_BLOCKED_PORTS: environment.AWF_BLOCKED_PORTS || '', + AWF_ENABLE_HOST_ACCESS: environment.AWF_ENABLE_HOST_ACCESS || '', + AWF_API_PROXY_IP: environment.AWF_API_PROXY_IP || '', + AWF_DOH_PROXY_IP: environment.AWF_DOH_PROXY_IP || '', + AWF_SSL_BUMP_ENABLED: environment.AWF_SSL_BUMP_ENABLED || '', + AWF_SSL_BUMP_INTERCEPT_PORT: environment.AWF_SSL_BUMP_INTERCEPT_PORT || '', + }, + depends_on: { + 'agent': { + condition: 'service_started', + }, + }, + // Only NET_ADMIN is needed for iptables setup + cap_add: ['NET_ADMIN'], + cap_drop: ['ALL'], + security_opt: ['no-new-privileges:true'], + // Run setup-iptables.sh then signal readiness + command: ['/bin/bash', '-c', '/usr/local/bin/setup-iptables.sh && touch /tmp/awf-init/ready'], + // Resource limits (init container exits quickly) + mem_limit: '128m', + pids_limit: 50, + // Restart policy: never restart (init container runs once) + restart: 'no', + }; + + // Use the same image/build as the agent container for the iptables init service + if (agentService.image) { + iptablesInitService.image = agentService.image; + } else if (agentService.build) { + iptablesInitService.build = agentService.build; + } + // API Proxy sidecar service (Node.js) - optionally deployed const services: Record = { 'squid-proxy': squidService, 'agent': agentService, + 'iptables-init': iptablesInitService, }; // Add Node.js API proxy sidecar if enabled @@ -1444,7 +1503,7 @@ export async function startContainers(workDir: string, allowedDomains: string[], // This handles orphaned containers from failed/interrupted previous runs logger.debug('Removing any existing containers with conflicting names...'); try { - await execa('docker', ['rm', '-f', 'awf-squid', 'awf-agent', 'awf-api-proxy'], { + await execa('docker', ['rm', '-f', 'awf-squid', 'awf-agent', 'awf-iptables-init', 'awf-api-proxy'], { reject: false, }); } catch { From b183c233f3974ac316652f696f7a5966d18a1d38 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 01:51:31 +0000 Subject: [PATCH 2/8] fix: add agent healthcheck to prevent init container race condition The iptables-init container uses network_mode: service:agent to share the agent's network namespace. With depends_on: service_started, Docker may try to look up the agent's PID in /proc before it's fully visible, causing "lstat /proc/PID/ns/net: no such file or directory". Adding a healthcheck to the agent and using service_healthy ensures the PID is stable before the init container starts. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/docker-manager.test.ts | 2 +- src/docker-manager.ts | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 2df2b45e..99467164 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -1084,7 +1084,7 @@ describe('docker-manager', () => { expect(initService.cap_drop).toEqual(['ALL']); expect(initService.network_mode).toBe('service:agent'); expect(initService.depends_on).toEqual({ - 'agent': { condition: 'service_started' }, + 'agent': { condition: 'service_healthy' }, }); expect(initService.command).toEqual([ '/bin/bash', '-c', diff --git a/src/docker-manager.ts b/src/docker-manager.ts index d2ce46e3..bab444b9 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -977,6 +977,17 @@ export function generateDockerCompose( cpu_shares: 1024, // Default CPU share stdin_open: true, tty: config.tty || false, // Use --tty flag, default to false for clean logs + // Healthcheck ensures the agent process is alive and its PID is visible in /proc + // before the iptables-init container tries to join via network_mode: service:agent. + // Without this, there's a race where the init container tries to look up the agent's + // PID in /proc/PID/ns/net before the kernel has made it visible. + healthcheck: { + test: ['CMD-SHELL', 'true'], + interval: '1s', + timeout: '1s', + retries: 3, + start_period: '1s', + }, // Escape $ with $$ for Docker Compose variable interpolation command: ['/bin/bash', '-c', config.agentCommand.replace(/\$/g, '$$$$')], }; @@ -1066,7 +1077,7 @@ export function generateDockerCompose( }, depends_on: { 'agent': { - condition: 'service_started', + condition: 'service_healthy', }, }, // Only NET_ADMIN is needed for iptables setup From 6bc5d54ea552313b932680cb11d7817cc6aa9e07 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 01:55:27 +0000 Subject: [PATCH 3/8] fix: pre-build Docker images in Test Examples CI The init container architecture requires the agent image to have the updated entrypoint that waits for the init container's ready signal. Without pre-building, examples use GHCR images with the old entrypoint, causing the agent to exit because it tries to run setup-iptables.sh without NET_ADMIN capability. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/test-examples.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/test-examples.yml b/.github/workflows/test-examples.yml index 078d51cd..147ffc0b 100644 --- a/.github/workflows/test-examples.yml +++ b/.github/workflows/test-examples.yml @@ -38,6 +38,13 @@ jobs: - name: Install awf globally run: sudo npm link + - name: Build Docker images locally + run: | + # Build agent and squid images from source and tag as GHCR images + # so examples that use default GHCR images get the PR's code + docker build -t ghcr.io/github/gh-aw-firewall/agent:latest containers/agent/ + docker build -t ghcr.io/github/gh-aw-firewall/squid:latest containers/squid/ + - name: Pre-test cleanup run: sudo ./scripts/ci/cleanup.sh From 0ed312fe2515845db56444fa023f45d6c7a130e7 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 02:08:47 +0000 Subject: [PATCH 4/8] fix: pass SQUID_PROXY_HOST/PORT to init container for DNS resolution setup-iptables.sh reads SQUID_PROXY_HOST (not AWF_SQUID_HOST), but the init container only passed AWF_SQUID_HOST. Since the init container uses network_mode: service:agent, it may not have DNS resolution for compose service names, causing getent hosts to fail and the script to exit before writing the ready signal. Use the direct IP address to avoid DNS issues. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/docker-manager.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/docker-manager.ts b/src/docker-manager.ts index bab444b9..4e6e5bc7 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -1065,8 +1065,11 @@ export function generateDockerCompose( ], environment: { // Pass through environment variables needed by setup-iptables.sh - AWF_SQUID_HOST: environment.AWF_SQUID_HOST || `${networkConfig.squidIp}`, - AWF_SQUID_PORT: String(SQUID_PORT), + // IMPORTANT: setup-iptables.sh reads SQUID_PROXY_HOST/PORT (not AWF_ prefixed). + // Use the direct IP address since the init container (network_mode: service:agent) + // may not have DNS resolution for compose service names. + SQUID_PROXY_HOST: `${networkConfig.squidIp}`, + SQUID_PROXY_PORT: String(SQUID_PORT), AWF_DNS_SERVERS: environment.AWF_DNS_SERVERS || '', AWF_BLOCKED_PORTS: environment.AWF_BLOCKED_PORTS || '', AWF_ENABLE_HOST_ACCESS: environment.AWF_ENABLE_HOST_ACCESS || '', From 362358a26769d7e352570f64a50b6c1504bd4524 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 02:16:50 +0000 Subject: [PATCH 5/8] fix: skip DNS reverse lookup when SQUID_HOST is already an IP address The init container passes SQUID_PROXY_HOST as a direct IP (172.30.0.10) to bypass DNS resolution. But setup-iptables.sh runs getent hosts on it, which does a reverse DNS lookup that fails in Docker containers, causing the init container to exit before writing the ready signal. The agent then times out after 30s waiting for /tmp/awf-init/ready. Co-Authored-By: Claude Opus 4.6 (1M context) --- containers/agent/setup-iptables.sh | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/containers/agent/setup-iptables.sh b/containers/agent/setup-iptables.sh index d54d0250..4e3991f9 100644 --- a/containers/agent/setup-iptables.sh +++ b/containers/agent/setup-iptables.sh @@ -44,13 +44,21 @@ SQUID_PORT="${SQUID_PROXY_PORT:-3128}" echo "[iptables] Squid proxy: ${SQUID_HOST}:${SQUID_PORT}" # Resolve Squid hostname to IP -# Use awk's NR to get first line to avoid host binary dependency in chroot mode -SQUID_IP=$(getent hosts "$SQUID_HOST" | awk 'NR==1 { print $1 }') -if [ -z "$SQUID_IP" ]; then - echo "[iptables] ERROR: Could not resolve Squid proxy hostname: $SQUID_HOST" - exit 1 +# If SQUID_HOST is already a valid IPv4 address, use it directly (no DNS lookup needed). +# This is important for the init container which passes a direct IP via SQUID_PROXY_HOST +# because getent hosts with an IP does a reverse DNS lookup that fails in Docker. +if is_valid_ipv4 "$SQUID_HOST"; then + SQUID_IP="$SQUID_HOST" + echo "[iptables] Squid host is already an IP address: $SQUID_IP" +else + # Use awk's NR to get first line to avoid host binary dependency in chroot mode + SQUID_IP=$(getent hosts "$SQUID_HOST" | awk 'NR==1 { print $1 }') + if [ -z "$SQUID_IP" ]; then + echo "[iptables] ERROR: Could not resolve Squid proxy hostname: $SQUID_HOST" + exit 1 + fi + echo "[iptables] Squid IP resolved to: $SQUID_IP" fi -echo "[iptables] Squid IP resolved to: $SQUID_IP" # Clear existing NAT rules (both IPv4 and IPv6) iptables -t nat -F OUTPUT 2>/dev/null || true From e07e1d61257beaa6aebb031176a971c6ac644920 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 02:41:24 +0000 Subject: [PATCH 6/8] fix: add NET_RAW capability to init container and improve diagnostics The iptables init container was hanging because cap_drop: ALL removed NET_RAW which iptables needs for netfilter socket operations. Also removed no-new-privileges which can block iptables binary execution. Added diagnostic output logging: setup-iptables.sh output is written to /tmp/awf-init/output.log (shared volume), and on timeout the entrypoint displays the log for easier CI debugging. Co-Authored-By: Claude Opus 4.6 (1M context) --- containers/agent/entrypoint.sh | 6 ++++++ src/docker-manager.test.ts | 6 +++--- src/docker-manager.ts | 10 +++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/containers/agent/entrypoint.sh b/containers/agent/entrypoint.sh index afa2ce86..41ec2da6 100644 --- a/containers/agent/entrypoint.sh +++ b/containers/agent/entrypoint.sh @@ -125,6 +125,12 @@ INIT_ELAPSED=0 while [ ! -f /tmp/awf-init/ready ]; do if [ "$INIT_ELAPSED" -ge "$INIT_TIMEOUT" ]; then echo "[entrypoint][ERROR] Timed out waiting for iptables init container after 30s" + if [ -f /tmp/awf-init/output.log ]; then + echo "[entrypoint] Init container output:" + cat /tmp/awf-init/output.log + else + echo "[entrypoint] No init container output log found" + fi exit 1 fi sleep 0.1 diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 99467164..2e5dabb5 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -1080,7 +1080,7 @@ describe('docker-manager', () => { expect(initService).toBeDefined(); expect(initService.container_name).toBe('awf-iptables-init'); - expect(initService.cap_add).toEqual(['NET_ADMIN']); + expect(initService.cap_add).toEqual(['NET_ADMIN', 'NET_RAW']); expect(initService.cap_drop).toEqual(['ALL']); expect(initService.network_mode).toBe('service:agent'); expect(initService.depends_on).toEqual({ @@ -1088,9 +1088,9 @@ describe('docker-manager', () => { }); expect(initService.command).toEqual([ '/bin/bash', '-c', - '/usr/local/bin/setup-iptables.sh && touch /tmp/awf-init/ready', + '/usr/local/bin/setup-iptables.sh > /tmp/awf-init/output.log 2>&1 && touch /tmp/awf-init/ready', ]); - expect(initService.security_opt).toContain('no-new-privileges:true'); + expect(initService.security_opt).toBeUndefined(); expect(initService.restart).toBe('no'); }); diff --git a/src/docker-manager.ts b/src/docker-manager.ts index 4e6e5bc7..38d58aff 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -1083,12 +1083,12 @@ export function generateDockerCompose( condition: 'service_healthy', }, }, - // Only NET_ADMIN is needed for iptables setup - cap_add: ['NET_ADMIN'], + // NET_ADMIN is required for iptables rule manipulation. + // NET_RAW is required by iptables for netfilter socket operations. + cap_add: ['NET_ADMIN', 'NET_RAW'], cap_drop: ['ALL'], - security_opt: ['no-new-privileges:true'], - // Run setup-iptables.sh then signal readiness - command: ['/bin/bash', '-c', '/usr/local/bin/setup-iptables.sh && touch /tmp/awf-init/ready'], + // Run setup-iptables.sh then signal readiness; log output to shared volume for diagnostics + command: ['/bin/bash', '-c', '/usr/local/bin/setup-iptables.sh > /tmp/awf-init/output.log 2>&1 && touch /tmp/awf-init/ready'], // Resource limits (init container exits quickly) mem_limit: '128m', pids_limit: 50, From 861d5286cf609685d173a15feabbb1eaa643822a Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 02:54:01 +0000 Subject: [PATCH 7/8] fix: override init container entrypoint to prevent deadlock The init container uses the same Docker image as the agent, which has ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]. The entrypoint.sh contains an "init container wait" loop that waits for /tmp/awf-init/ready to appear. When the init container runs through this same entrypoint, it deadlocks waiting for itself to signal readiness. Fix: Set entrypoint: ['/bin/bash'] on the init container to bypass entrypoint.sh and run setup-iptables.sh directly. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/docker-manager.test.ts | 4 +++- src/docker-manager.ts | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 2e5dabb5..ccbb6e04 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -1086,8 +1086,10 @@ describe('docker-manager', () => { expect(initService.depends_on).toEqual({ 'agent': { condition: 'service_healthy' }, }); + // Entrypoint is overridden to bypass agent's entrypoint.sh (which has init wait loop) + expect(initService.entrypoint).toEqual(['/bin/bash']); expect(initService.command).toEqual([ - '/bin/bash', '-c', + '-c', '/usr/local/bin/setup-iptables.sh > /tmp/awf-init/output.log 2>&1 && touch /tmp/awf-init/ready', ]); expect(initService.security_opt).toBeUndefined(); diff --git a/src/docker-manager.ts b/src/docker-manager.ts index 38d58aff..38ab0220 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -1087,8 +1087,12 @@ export function generateDockerCompose( // NET_RAW is required by iptables for netfilter socket operations. cap_add: ['NET_ADMIN', 'NET_RAW'], cap_drop: ['ALL'], + // Override entrypoint to bypass the agent's entrypoint.sh, which contains an + // "init container wait" loop that would deadlock (the init container waiting for itself). + // The init container only needs to run setup-iptables.sh directly. + entrypoint: ['/bin/bash'], // Run setup-iptables.sh then signal readiness; log output to shared volume for diagnostics - command: ['/bin/bash', '-c', '/usr/local/bin/setup-iptables.sh > /tmp/awf-init/output.log 2>&1 && touch /tmp/awf-init/ready'], + command: ['-c', '/usr/local/bin/setup-iptables.sh > /tmp/awf-init/output.log 2>&1 && touch /tmp/awf-init/ready'], // Resource limits (init container exits quickly) mem_limit: '128m', pids_limit: 50, From 80412f97644ce55aa06c59171645b0cdd06253b0 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Fri, 13 Mar 2026 03:09:28 +0000 Subject: [PATCH 8/8] fix: set AWF_API_PROXY_IP before init container definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The init container's environment object captures values at definition time (JavaScript object literal evaluation). AWF_API_PROXY_IP was set on line 1196 (inside the enableApiProxy block) but read on line 1076 (init container definition), so the init container always got an empty string. This caused setup-iptables.sh to skip adding ACCEPT rules for the API proxy IP (172.30.0.30), blocking agent→api-proxy connectivity and failing the API proxy health check. Move the assignment before the init container definition so the value is available when the object literal is evaluated. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/docker-manager.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/docker-manager.ts b/src/docker-manager.ts index 38ab0220..592abbbc 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -1052,6 +1052,15 @@ export function generateDockerCompose( agentService.image = agentImage; } + // Pre-set API proxy IP in environment before the init container definition. + // The init container's environment object captures values at definition time, + // so AWF_API_PROXY_IP must be set before the init container is defined. + // Without this, the init container gets an empty AWF_API_PROXY_IP and + // setup-iptables.sh never adds ACCEPT rules for the API proxy, blocking connectivity. + if (config.enableApiProxy && networkConfig.proxyIp) { + environment.AWF_API_PROXY_IP = networkConfig.proxyIp; + } + // SECURITY: iptables init container - sets up NAT rules in a separate container // that shares the agent's network namespace but NEVER gives NET_ADMIN to the agent. // This eliminates the window where the agent holds NET_ADMIN during startup.