-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathssh-broker.sh
More file actions
executable file
·180 lines (169 loc) · 8.41 KB
/
Copy pathssh-broker.sh
File metadata and controls
executable file
·180 lines (169 loc) · 8.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/env bash
# AgentKeys broker SSH — single entry point for prod + test, reading
# INSTANCE_ID / EIP from the corresponding env file so this script
# stays in lockstep with whatever setup-cloud.sh persisted there.
#
# Replaces the per-operator shell aliases:
# alias ssh-agentkeys='AWS_PROFILE=… aws ec2-instance-connect ssh --instance-id …'
#
# Usage:
# bash scripts/ssh-broker.sh # prod via EC2 Instance Connect
# bash scripts/ssh-broker.sh test # test (fleet slot 1) via EC2 Instance Connect
# bash scripts/ssh-broker.sh test-2 # test-fleet slot 2 (issue #265; reads
# # broker.test-2.env, profile agentkeys-broker-test-2)
# bash scripts/ssh-broker.sh base # Base prod stack (#282; reads broker.base.env,
# # profile agentkeys-broker-base)
# bash scripts/ssh-broker.sh prod --fallback # prod via .pem (when EC2-IC is down)
# bash scripts/ssh-broker.sh test --fallback # test via .pem
# bash scripts/ssh-broker.sh --help
#
# Flags:
# --fallback use raw SSH + .pem key instead of EC2 Instance Connect
# --pem <path> override .pem key path (default: ~/.ssh/Wildmeta-agent-mac.pem)
# --os-user <name> override SSH user (default: agentkey for EC2-IC, ubuntu for fallback)
# --aws-profile <name> override AWS profile (default per stack — see below)
#
# Default AWS profiles (least-privilege, per AGENTS.md "AWS local-profile ↔
# remote-IAM mapping"):
# prod → agentkeys-broker
# test → agentkeys-broker-test
#
# Suggested shell wrappers (drop in ~/.zshrc):
# alias ssh-prod='bash $AGENTKEYS_REPO/scripts/ssh-broker.sh prod'
# alias ssh-test='bash $AGENTKEYS_REPO/scripts/ssh-broker.sh test'
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
STACK="prod"
FALLBACK=0
PEM_PATH="$HOME/.ssh/Wildmeta-agent-mac.pem"
OS_USER=""
AWS_PROFILE_OVERRIDE=""
STACK_SET=0
while [ $# -gt 0 ]; do
case "$1" in
prod|test|test-[0-9]*|base) STACK="$1"; STACK_SET=1; shift ;;
--fallback) FALLBACK=1; shift ;;
--pem) PEM_PATH="$2"; shift 2 ;;
--os-user) OS_USER="$2"; shift 2 ;;
--aws-profile) AWS_PROFILE_OVERRIDE="$2"; shift 2 ;;
--help|-h)
sed -n '2,30p' "$0" | sed 's/^# //; s/^#//'
exit 0
;;
--) shift; break ;;
*)
# Unknown arg = start of the remote-command passthrough — EXCEPT when
# no stack was given and the arg is stack-shaped (a space-less token
# like test-9/test2/prod-x): that's a stack typo or a stale script
# missing a newer stack name, and passing it through would silently
# run it as a remote command on the DEFAULT stack (prod) — the exact
# incident class hit when `test-2` predated test-N support. Die loud.
if [ "$STACK_SET" = "0" ]; then
case "$1" in
*" "*|*" "*) : ;; # has whitespace → clearly a remote command
prod-*|prod[0-9]*|test-*|test[0-9]*|base-*|base[0-9]*)
echo "Unknown stack: '$1' (expected prod, test, test-<N>, or base; declared stacks = scripts/broker*.env)." >&2
echo "If you meant a remote command named '$1', pass the stack explicitly first: $0 prod '$1'" >&2
exit 2
;;
esac
fi
break ;; # remote command passthrough
esac
done
# Anything left in "$@" is forwarded to the SSH session as the remote
# command — so `ssh-broker.sh test echo hi` runs `echo hi` on the test
# host. Both `aws ec2-instance-connect ssh` and raw `ssh` accept a
# trailing command after their flags.
EXTRA_ARGS=("$@")
# Resolve env file + default profile + default OS user per stack. test-N
# (N>=2) is a test-fleet slot (issue #265): broker.test-N.env + the
# per-slot SSH IAM user agentkeys-broker-test-N.
case "$STACK" in
prod)
BROKER_ENV_FILE="$SCRIPT_DIR/broker.env"
: "${AWS_PROFILE_OVERRIDE:=agentkeys-broker}"
;;
test)
BROKER_ENV_FILE="$SCRIPT_DIR/broker.test.env"
: "${AWS_PROFILE_OVERRIDE:=agentkeys-broker-test}"
;;
test-[0-9]*)
BROKER_ENV_FILE="$SCRIPT_DIR/broker.${STACK}.env"
: "${AWS_PROFILE_OVERRIDE:=agentkeys-broker-${STACK}}"
;;
base)
BROKER_ENV_FILE="$SCRIPT_DIR/broker.base.env"
: "${AWS_PROFILE_OVERRIDE:=agentkeys-broker-base}"
;;
*) echo "Unknown stack: $STACK (expected prod, test, test-<N>, or base)" >&2; exit 2 ;;
esac
[ -f "$BROKER_ENV_FILE" ] || { echo "missing $BROKER_ENV_FILE" >&2; exit 1; }
INSTANCE_ID=$(grep '^INSTANCE_ID=' "$BROKER_ENV_FILE" | tail -1 | cut -d= -f2)
EIP=$( grep '^EIP=' "$BROKER_ENV_FILE" | tail -1 | cut -d= -f2)
[ -n "$INSTANCE_ID" ] || {
echo "INSTANCE_ID unset in $BROKER_ENV_FILE — paste 'INSTANCE_ID=i-…' once EC2 exists" >&2
exit 1
}
# Multiplex SSH connections via ControlMaster so subsequent ssh-broker.sh
# invocations within 10 min reuse the already-authenticated socket. The
# first connection still does the full SendSSHPublicKey + key exchange +
# ~5s warmup; every subsequent ssh-agentkeys-test in 10 min completes
# in ~50ms (no AWS API roundtrip, no ssh handshake).
#
# Socket path lives under /tmp (per-operator, per-(user,host,port) via
# the %C hash) so multiple operators on a shared workstation don't collide.
MUX_OPTS=(-o "ControlMaster=auto"
-o "ControlPath=/tmp/ssh-agentkeys-%C"
-o "ControlPersist=10m")
if [ "$FALLBACK" = "1" ]; then
[ -n "$EIP" ] || { echo "EIP unset in $BROKER_ENV_FILE — required for --fallback" >&2; exit 1; }
[ -f "$PEM_PATH" ] || { echo "PEM key not found at $PEM_PATH — pass --pem <path>" >&2; exit 1; }
# Default to `ubuntu` — the AMI's default user with the operator's .pem
# already in authorized_keys. The fallback path is for first-time
# bootstrap (before setup-broker-host.sh has created the agentkey user)
# OR for emergency recovery when EC2 Instance Connect is down. Steady-
# state operator work goes via ssh-agentkeys-test (non-fallback,
# `agentkey` user) — that's where files land in /home/agentkey/.
: "${OS_USER:=ubuntu}"
echo "ssh -i $PEM_PATH $OS_USER@$EIP (stack=$STACK, instance=$INSTANCE_ID, mux=on)" >&2
exec ssh -i "$PEM_PATH" "${MUX_OPTS[@]}" "$OS_USER@$EIP" ${EXTRA_ARGS[@]+"${EXTRA_ARGS[@]}"}
else
: "${OS_USER:=agentkey}"
# `aws ec2-instance-connect ssh` is a wrapper that doesn't allow
# passing arbitrary ssh args (no --ssh-options, doesn't honor `--`).
# That blocks ControlMaster multiplexing. Bypass the wrapper:
# 1. Generate a stable ephemeral keypair (one-shot per workstation)
# 2. Push the pubkey via send-ssh-public-key (API call, valid 60s)
# 3. Raw `ssh -i privkey` with ControlMaster opts to $EIP
# Once ControlMaster's socket is established, subsequent invocations
# in 10 min reuse the socket WITHOUT needing a new pubkey push —
# multiplexed connection, ~50ms latency.
EIC_KEY="$HOME/.ssh/ec2_instance_connect_id_ed25519"
if [[ ! -f "$EIC_KEY" ]]; then
ssh-keygen -t ed25519 -N "" -f "$EIC_KEY" -q -C "ec2-instance-connect ($USER@$HOSTNAME)"
fi
[ -n "$EIP" ] || { echo "EIP unset in $BROKER_ENV_FILE — required for direct ssh" >&2; exit 1; }
echo "send-ssh-public-key + ssh $OS_USER@$EIP (stack=$STACK, profile=$AWS_PROFILE_OVERRIDE, mux=on)" >&2
# Skip the API push if ControlMaster socket is already alive — the
# multiplexed connection doesn't need a fresh ephemeral key. ssh -O
# check exits 0 if the master is running.
if ! ssh -O check -o "ControlPath=/tmp/ssh-agentkeys-%C" "$OS_USER@$EIP" 2>/dev/null; then
# This aws call runs HERE (your laptop) to push a 60s EC2 Instance Connect
# pubkey — it is NOT run on the broker. A bare "aws: command not found"
# misleads; surface the real cause + the no-aws escape hatch.
command -v aws >/dev/null 2>&1 || {
echo "aws CLI not on PATH — needed locally to authorize this SSH via EC2 Instance Connect (not run on the broker)." >&2
echo " fix: put aws on PATH in this shell, OR run with the .pem instead: bash $0 $STACK --fallback" >&2
exit 1
}
AWS_PROFILE="$AWS_PROFILE_OVERRIDE" \
aws ec2-instance-connect send-ssh-public-key \
--instance-id "$INSTANCE_ID" \
--instance-os-user "$OS_USER" \
--ssh-public-key "file://${EIC_KEY}.pub" \
>/dev/null \
|| { echo "send-ssh-public-key failed for $INSTANCE_ID os-user=$OS_USER" >&2; exit 1; }
fi
exec ssh -i "$EIC_KEY" "${MUX_OPTS[@]}" "$OS_USER@$EIP" ${EXTRA_ARGS[@]+"${EXTRA_ARGS[@]}"}
fi