diff --git a/.github/workflows/functional.yml b/.github/workflows/functional.yml new file mode 100644 index 00000000..1bf68fc5 --- /dev/null +++ b/.github/workflows/functional.yml @@ -0,0 +1,107 @@ +name: Functional Tests + +on: + push: + branches: + - master + pull_request: + branches: + - master + workflow_dispatch: + +jobs: + functional: + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.25.7' + cache: true + + - name: Build orchestrator (for CLI tests on host) + run: go build -o bin/orchestrator ./go/cmd/orchestrator + + - name: Start test infrastructure (MySQL + ProxySQL + Orchestrator) + working-directory: tests/functional + run: | + docker compose up -d mysql1 mysql2 mysql3 proxysql + echo "Waiting for MySQL and ProxySQL to be healthy..." + timeout 120 bash -c ' + while true; do + HEALTHY=$(docker compose ps --format json 2>/dev/null | python3 -c " + import json, sys + healthy = 0 + for line in sys.stdin: + svc = json.loads(line) + if \"healthy\" in svc.get(\"Status\",\"\").lower(): + healthy += 1 + print(healthy) + " 2>/dev/null || echo "0") + if [ "$HEALTHY" -ge 4 ]; then + echo "All 4 services healthy" + exit 0 + fi + sleep 2 + done + ' || { echo "Timeout"; docker compose ps; docker compose logs --tail=30; exit 1; } + + - name: Setup replication + run: bash tests/functional/setup-replication.sh + + - name: Start orchestrator in Docker network + working-directory: tests/functional + run: | + docker compose up -d orchestrator + echo "Waiting for orchestrator to be ready..." + timeout 120 bash -c ' + while true; do + if curl -sf http://localhost:3099/api/clusters > /dev/null 2>&1; then + echo "Orchestrator ready" + exit 0 + fi + sleep 2 + done + ' || { echo "Orchestrator not ready"; docker compose logs orchestrator --tail=50; exit 1; } + + - name: Run smoke tests + run: bash tests/functional/test-smoke.sh + + - name: Run regression tests + run: bash tests/functional/test-regression.sh + + - name: Run failover tests + run: bash tests/functional/test-failover.sh + + - name: Collect orchestrator logs + if: always() + working-directory: tests/functional + run: docker compose logs orchestrator > /tmp/orchestrator-test.log 2>&1 || true + + - name: Upload orchestrator logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: orchestrator-test-logs + path: /tmp/orchestrator-test.log + + - name: Collect all docker logs on failure + if: failure() + working-directory: tests/functional + run: docker compose logs > /tmp/docker-compose-logs.txt 2>&1 || true + + - name: Upload docker logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: docker-compose-logs + path: /tmp/docker-compose-logs.txt + + - name: Cleanup + if: always() + working-directory: tests/functional + run: docker compose down -v --remove-orphans 2>/dev/null || true diff --git a/tests/functional/docker-compose.yml b/tests/functional/docker-compose.yml new file mode 100644 index 00000000..3ab62623 --- /dev/null +++ b/tests/functional/docker-compose.yml @@ -0,0 +1,127 @@ +version: "3.8" + +services: + mysql1: + image: mysql:8.4 + hostname: mysql1 + environment: + MYSQL_ROOT_PASSWORD: testpass + ports: + - "13306:3306" + volumes: + - ./mysql/master.cnf:/etc/mysql/conf.d/repl.cnf + - ./mysql/init-master.sql:/docker-entrypoint-initdb.d/init.sql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-ptestpass"] + interval: 5s + timeout: 3s + retries: 30 + networks: + orchnet: + aliases: + - mysql1 + + mysql2: + image: mysql:8.4 + hostname: mysql2 + environment: + MYSQL_ROOT_PASSWORD: testpass + ports: + - "13307:3306" + volumes: + - ./mysql/replica.cnf:/etc/mysql/conf.d/repl.cnf + - ./mysql/init-replica.sql:/docker-entrypoint-initdb.d/init.sql + depends_on: + mysql1: + condition: service_healthy + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-ptestpass"] + interval: 5s + timeout: 3s + retries: 30 + networks: + orchnet: + aliases: + - mysql2 + + mysql3: + image: mysql:8.4 + hostname: mysql3 + environment: + MYSQL_ROOT_PASSWORD: testpass + ports: + - "13308:3306" + volumes: + - ./mysql/replica2.cnf:/etc/mysql/conf.d/repl.cnf + - ./mysql/init-replica.sql:/docker-entrypoint-initdb.d/init.sql + depends_on: + mysql1: + condition: service_healthy + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-ptestpass"] + interval: 5s + timeout: 3s + retries: 30 + networks: + orchnet: + aliases: + - mysql3 + + proxysql: + image: proxysql/proxysql:latest + hostname: proxysql + ports: + - "16032:6032" + - "16033:6033" + volumes: + - ./proxysql/proxysql.cnf:/etc/proxysql.cnf + depends_on: + mysql1: + condition: service_healthy + mysql2: + condition: service_healthy + mysql3: + condition: service_healthy + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "127.0.0.1", "-P6032", "-uradmin", "-pradmin"] + interval: 5s + timeout: 3s + retries: 30 + networks: + orchnet: + aliases: + - proxysql + + orchestrator: + image: ubuntu:24.04 + hostname: orchestrator + volumes: + - ../../bin/orchestrator:/usr/local/bin/orchestrator:ro + - ../../resources:/orchestrator/resources:ro + - ./orchestrator-test.conf.json:/orchestrator/orchestrator.conf.json:ro + command: > + bash -c " + apt-get update -qq && apt-get install -y -qq curl sqlite3 > /dev/null 2>&1 && + rm -f /tmp/orchestrator-test.sqlite3 && + cd /orchestrator && + orchestrator -config orchestrator.conf.json http + " + ports: + - "3099:3099" + depends_on: + proxysql: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:3099/api/clusters"] + interval: 5s + timeout: 3s + retries: 60 + start_period: 15s + networks: + orchnet: + aliases: + - orchestrator + +networks: + orchnet: + driver: bridge diff --git a/tests/functional/lib.sh b/tests/functional/lib.sh new file mode 100755 index 00000000..bbad116d --- /dev/null +++ b/tests/functional/lib.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Shared test helpers for functional tests + +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +ORC_URL="http://localhost:3099" + +pass() { + echo " ✅ PASS: $1" + ((PASS_COUNT++)) +} + +fail() { + echo " ❌ FAIL: $1" + [ -n "$2" ] && echo " $2" + ((FAIL_COUNT++)) +} + +skip() { + echo " ⚠️ SKIP: $1" + ((SKIP_COUNT++)) +} + +summary() { + echo "" + echo "=== RESULTS: $PASS_COUNT passed, $FAIL_COUNT failed, $SKIP_COUNT skipped ===" + [ "$FAIL_COUNT" -gt 0 ] && exit 1 + exit 0 +} + +# Test that an HTTP endpoint returns expected status code +test_endpoint() { + local NAME="$1" URL="$2" EXPECT="$3" + local CODE + CODE=$(curl -s -o /dev/null -w "%{http_code}" "$URL" 2>&1) + if [ "$CODE" = "$EXPECT" ]; then + pass "$NAME (HTTP $CODE)" + else + fail "$NAME (HTTP $CODE, expected $EXPECT)" + fi +} + +# Test that response body contains a string +test_body_contains() { + local NAME="$1" URL="$2" EXPECT="$3" + local BODY + BODY=$(curl -s "$URL" 2>&1) + if echo "$BODY" | grep -q "$EXPECT"; then + pass "$NAME" + else + fail "$NAME" "Response does not contain '$EXPECT'" + fi +} + +# Wait for orchestrator to be ready +wait_for_orchestrator() { + echo "Waiting for orchestrator to be ready..." + for i in $(seq 1 30); do + if curl -s -o /dev/null "$ORC_URL/api/clusters" 2>/dev/null; then + echo "Orchestrator ready after ${i}s" + return 0 + fi + sleep 1 + done + echo "Orchestrator not ready after 30s" + return 1 +} + +# Seed discovery and wait for all instances +# Sets CLUSTER_NAME as a global variable +CLUSTER_NAME="" +discover_topology() { + local MASTER_HOST="$1" + echo "Seeding discovery with $MASTER_HOST..." + curl -s "$ORC_URL/api/discover/$MASTER_HOST/3306" > /dev/null + + # Also seed replicas directly + curl -s "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1 + curl -s "$ORC_URL/api/discover/mysql3/3306" > /dev/null 2>&1 + + echo "Waiting for topology discovery..." + for i in $(seq 1 60); do + # Get the cluster name dynamically + CLUSTER_NAME=$(curl -s "$ORC_URL/api/clusters" 2>/dev/null | python3 -c "import json,sys; c=json.load(sys.stdin); print(c[0] if c else '')" 2>/dev/null || echo "") + if [ -n "$CLUSTER_NAME" ]; then + local COUNT + COUNT=$(curl -s "$ORC_URL/api/cluster/$CLUSTER_NAME" 2>/dev/null | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0") + if [ "$COUNT" -ge 3 ] 2>/dev/null; then + echo "Full topology discovered (${COUNT} instances, cluster=$CLUSTER_NAME) after ${i}s" + return 0 + fi + fi + # Re-seed replicas periodically + if [ "$((i % 10))" = "0" ]; then + curl -s "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1 + curl -s "$ORC_URL/api/discover/mysql3/3306" > /dev/null 2>&1 + fi + sleep 1 + done + echo "WARNING: Cluster=$CLUSTER_NAME, instances=${COUNT:-0} after 60s" + return 1 +} + +# Get ProxySQL servers for a hostgroup +proxysql_servers() { + local HG="$1" + docker compose -f tests/functional/docker-compose.yml exec -T proxysql \ + mysql -h127.0.0.1 -P6032 -uradmin -pradmin -Nse \ + "SELECT hostname, port, status FROM runtime_mysql_servers WHERE hostgroup_id=$HG" 2>/dev/null +} + +# Get MySQL read_only status +mysql_read_only() { + local CONTAINER="$1" + docker compose -f tests/functional/docker-compose.yml exec -T "$CONTAINER" \ + mysql -uroot -ptestpass -Nse "SELECT @@read_only" 2>/dev/null +} + +# Get MySQL replication source +mysql_source_host() { + local CONTAINER="$1" + docker compose -f tests/functional/docker-compose.yml exec -T "$CONTAINER" \ + mysql -uroot -ptestpass -Nse "SHOW REPLICA STATUS\G" 2>/dev/null | grep "Source_Host" | awk '{print $2}' +} diff --git a/tests/functional/mysql/init-master.sql b/tests/functional/mysql/init-master.sql new file mode 100644 index 00000000..ed8a4a76 --- /dev/null +++ b/tests/functional/mysql/init-master.sql @@ -0,0 +1,9 @@ +-- Orchestrator user with full privileges +CREATE USER IF NOT EXISTS 'orchestrator'@'%' IDENTIFIED BY 'orch_pass'; +GRANT ALL PRIVILEGES ON *.* TO 'orchestrator'@'%' WITH GRANT OPTION; + +-- Replication user +CREATE USER IF NOT EXISTS 'repl'@'%' IDENTIFIED BY 'repl_pass'; +GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%'; + +FLUSH PRIVILEGES; diff --git a/tests/functional/mysql/init-replica.sql b/tests/functional/mysql/init-replica.sql new file mode 100644 index 00000000..d9dedc73 --- /dev/null +++ b/tests/functional/mysql/init-replica.sql @@ -0,0 +1,8 @@ +-- Orchestrator user +CREATE USER IF NOT EXISTS 'orchestrator'@'%' IDENTIFIED BY 'orch_pass'; +GRANT ALL PRIVILEGES ON *.* TO 'orchestrator'@'%' WITH GRANT OPTION; +-- Replication user (needed if this replica gets promoted) +CREATE USER IF NOT EXISTS 'repl'@'%' IDENTIFIED BY 'repl_pass'; +GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%'; +FLUSH PRIVILEGES; +-- NOTE: Replication is configured by setup-replication.sh after all containers are up diff --git a/tests/functional/mysql/master.cnf b/tests/functional/mysql/master.cnf new file mode 100644 index 00000000..2fe72f7c --- /dev/null +++ b/tests/functional/mysql/master.cnf @@ -0,0 +1,9 @@ +[mysqld] +server-id=1 +log-bin=mysql-bin +binlog-format=ROW +gtid-mode=ON +enforce-gtid-consistency=ON +log-replica-updates=ON +binlog-row-image=MINIMAL +report-host=mysql1 diff --git a/tests/functional/mysql/replica.cnf b/tests/functional/mysql/replica.cnf new file mode 100644 index 00000000..193e1bd3 --- /dev/null +++ b/tests/functional/mysql/replica.cnf @@ -0,0 +1,9 @@ +[mysqld] +server-id=100 +log-bin=mysql-bin +binlog-format=ROW +gtid-mode=ON +enforce-gtid-consistency=ON +log-replica-updates=ON +binlog-row-image=MINIMAL +read-only=ON diff --git a/tests/functional/mysql/replica2.cnf b/tests/functional/mysql/replica2.cnf new file mode 100644 index 00000000..2636a9bc --- /dev/null +++ b/tests/functional/mysql/replica2.cnf @@ -0,0 +1,9 @@ +[mysqld] +server-id=200 +log-bin=mysql-bin +binlog-format=ROW +gtid-mode=ON +enforce-gtid-consistency=ON +log-replica-updates=ON +binlog-row-image=MINIMAL +read-only=ON diff --git a/tests/functional/orchestrator-test.conf.json b/tests/functional/orchestrator-test.conf.json new file mode 100644 index 00000000..4713e9f9 --- /dev/null +++ b/tests/functional/orchestrator-test.conf.json @@ -0,0 +1,26 @@ +{ + "Debug": true, + "ListenAddress": ":3099", + "MySQLTopologyUser": "orchestrator", + "MySQLTopologyPassword": "orch_pass", + "MySQLOrchestratorHost": "", + "MySQLOrchestratorPort": 0, + "BackendDB": "sqlite", + "SQLite3DataFile": "/tmp/orchestrator-test.sqlite3", + "DiscoverByShowSlaveHosts": false, + "InstancePollSeconds": 5, + "RecoveryPeriodBlockSeconds": 10, + "RecoverMasterClusterFilters": [".*"], + "RecoverIntermediateMasterClusterFilters": [".*"], + "AutoPseudoGTID": false, + "DetectClusterAliasQuery": "SELECT CONCAT(@@hostname, ':', @@port)", + "DetectInstanceAliasQuery": "SELECT CONCAT(@@hostname, ':', @@port)", + "ProxySQLAdminAddress": "proxysql", + "ProxySQLAdminPort": 6032, + "ProxySQLAdminUser": "radmin", + "ProxySQLAdminPassword": "radmin", + "ProxySQLWriterHostgroup": 10, + "ProxySQLReaderHostgroup": 20, + "ProxySQLPreFailoverAction": "offline_soft", + "PrometheusEnabled": true +} diff --git a/tests/functional/proxysql/proxysql.cnf b/tests/functional/proxysql/proxysql.cnf new file mode 100644 index 00000000..9c2f8e66 --- /dev/null +++ b/tests/functional/proxysql/proxysql.cnf @@ -0,0 +1,34 @@ +datadir="/var/lib/proxysql" + +admin_variables= +{ + admin_credentials="admin:admin;radmin:radmin" + mysql_ifaces="0.0.0.0:6032" +} + +mysql_variables= +{ + threads=2 + max_connections=100 + default_query_delay=0 + default_query_timeout=36000000 + interfaces="0.0.0.0:6033" + monitor_username="orchestrator" + monitor_password="orch_pass" + monitor_galera_healthcheck_interval=2000 + monitor_connect_interval=2000 + monitor_ping_interval=2000 + monitor_read_only_interval=1000 +} + +mysql_servers= +( + { hostgroup_id=10, hostname="mysql1", port=3306, comment="writer" }, + { hostgroup_id=20, hostname="mysql2", port=3306, comment="reader1" }, + { hostgroup_id=20, hostname="mysql3", port=3306, comment="reader2" } +) + +mysql_users= +( + { username="orchestrator", password="orch_pass", default_hostgroup=10 } +) diff --git a/tests/functional/run-all.sh b/tests/functional/run-all.sh new file mode 100755 index 00000000..760c632a --- /dev/null +++ b/tests/functional/run-all.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Run all functional tests: infrastructure setup, smoke, failover, regression +set -euo pipefail +cd "$(dirname "$0")/../.." + +COMPOSE="docker compose -f tests/functional/docker-compose.yml" + +echo "=== FUNCTIONAL TEST SUITE ===" +echo "" + +# ---------------------------------------------------------------- +echo "--- Step 1: Build orchestrator ---" +go build -o bin/orchestrator ./go/cmd/orchestrator +echo "Build OK" + +# ---------------------------------------------------------------- +echo "" +echo "--- Step 2: Start test infrastructure ---" +$COMPOSE down -v --remove-orphans 2>/dev/null || true +$COMPOSE up -d + +echo "Waiting for all services to be healthy..." +for i in $(seq 1 90); do + HEALTHY=$($COMPOSE ps --format json 2>/dev/null | python3 -c " +import json, sys +healthy = 0 +for line in sys.stdin: + svc = json.loads(line) + if svc.get('Health','') == 'healthy' or 'healthy' in svc.get('Status','').lower(): + healthy += 1 +print(healthy) +" 2>/dev/null || echo "0") + if [ "$HEALTHY" -ge 4 ]; then + echo "All 4 services healthy after ${i}s" + break + fi + if [ "$i" -eq 90 ]; then + echo "FATAL: Services not healthy after 90s" + $COMPOSE ps + $COMPOSE logs --tail=20 + exit 1 + fi + sleep 1 +done + +# Verify replication is running +echo "Verifying replication..." +for i in $(seq 1 30); do + REPL_OK=$($COMPOSE exec -T mysql2 mysql -uroot -ptestpass -Nse "SHOW REPLICA STATUS\G" 2>/dev/null | grep "Replica_IO_Running: Yes" | wc -l) + if [ "$REPL_OK" -ge 1 ]; then + echo "Replication is running" + break + fi + sleep 1 +done + +# ---------------------------------------------------------------- +echo "" +echo "--- Step 3: Start orchestrator ---" +rm -f /tmp/orchestrator-test.sqlite3 +bin/orchestrator -config tests/functional/orchestrator-test.conf.json http > /tmp/orchestrator-test.log 2>&1 & +ORC_PID=$! +echo $ORC_PID > /tmp/orchestrator-test.pid +echo "Orchestrator started (PID: $ORC_PID)" + +# ---------------------------------------------------------------- +echo "" +echo "--- Step 4: Run smoke tests ---" +bash tests/functional/test-smoke.sh +SMOKE_EXIT=$? + +echo "" +echo "--- Step 5: Run regression tests ---" +bash tests/functional/test-regression.sh +REGRESSION_EXIT=$? + +echo "" +echo "--- Step 6: Run failover tests ---" +bash tests/functional/test-failover.sh +FAILOVER_EXIT=$? + +# ---------------------------------------------------------------- +echo "" +echo "--- Cleanup ---" +kill $ORC_PID 2>/dev/null || true +$COMPOSE down -v --remove-orphans 2>/dev/null || true +rm -f /tmp/orchestrator-test.sqlite3 /tmp/orchestrator-test.pid + +echo "" +echo "=== FUNCTIONAL TEST SUITE COMPLETE ===" +echo "Smoke: exit $SMOKE_EXIT" +echo "Regression: exit $REGRESSION_EXIT" +echo "Failover: exit $FAILOVER_EXIT" + +# Exit with failure if any suite failed +[ "$SMOKE_EXIT" -ne 0 ] || [ "$REGRESSION_EXIT" -ne 0 ] || [ "$FAILOVER_EXIT" -ne 0 ] && exit 1 +exit 0 diff --git a/tests/functional/setup-replication.sh b/tests/functional/setup-replication.sh new file mode 100755 index 00000000..97693e38 --- /dev/null +++ b/tests/functional/setup-replication.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Set up replication after all MySQL containers are running +set -euo pipefail + +COMPOSE="docker compose -f tests/functional/docker-compose.yml" + +echo "Setting up replication..." + +for REPLICA in mysql2 mysql3; do + echo "Configuring $REPLICA to replicate from mysql1..." + for i in $(seq 1 30); do + $COMPOSE exec -T "$REPLICA" mysql -uroot -ptestpass -e " + CHANGE REPLICATION SOURCE TO + SOURCE_HOST='mysql1', + SOURCE_PORT=3306, + SOURCE_USER='repl', + SOURCE_PASSWORD='repl_pass', + SOURCE_AUTO_POSITION=1, + GET_SOURCE_PUBLIC_KEY=1; + START REPLICA; + " 2>/dev/null && break + sleep 1 + done +done + +echo "Verifying replication..." +for REPLICA in mysql2 mysql3; do + for i in $(seq 1 60); do + STATUS=$($COMPOSE exec -T "$REPLICA" mysql -uroot -ptestpass -Nse \ + "SELECT SERVICE_STATE FROM performance_schema.replication_connection_status" 2>/dev/null | tr -d '[:space:]') + if [ "$STATUS" = "ON" ]; then + echo "$REPLICA: replication OK (IO thread ON)" + break + fi + if [ "$i" -eq 60 ]; then + echo "$REPLICA: replication FAILED after 60s" + $COMPOSE exec -T "$REPLICA" mysql -uroot -ptestpass -e "SHOW REPLICA STATUS\G" 2>/dev/null || true + exit 1 + fi + sleep 1 + done +done + +echo "Replication setup complete" diff --git a/tests/functional/test-failover.sh b/tests/functional/test-failover.sh new file mode 100755 index 00000000..ef707f7b --- /dev/null +++ b/tests/functional/test-failover.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# Tier B: Failover tests — verify failover and ProxySQL hooks against real services +set -uo pipefail # no -e: we handle failures ourselves +cd "$(dirname "$0")/../.." +source tests/functional/lib.sh + +echo "=== TIER B: FAILOVER TESTS ===" + +wait_for_orchestrator || { echo "FATAL: Orchestrator not reachable"; exit 1; } +discover_topology "mysql1" + +# ---------------------------------------------------------------- +echo "" +echo "--- Pre-flight checks ---" + +RO1=$(mysql_read_only mysql1) +RO2=$(mysql_read_only mysql2) +if [ "$RO1" = "0" ] && [ "$RO2" = "1" ]; then + pass "Pre-flight: mysql1=master(RO=0), mysql2=replica(RO=1)" +else + fail "Pre-flight: mysql1 RO=$RO1, mysql2 RO=$RO2 (expected 0, 1)" +fi + +HG10=$(proxysql_servers 10) +if echo "$HG10" | grep -q "mysql1"; then + pass "Pre-flight: ProxySQL HG 10 = mysql1 (writer)" +else + fail "Pre-flight: ProxySQL HG 10 does not contain mysql1" +fi + +# ---------------------------------------------------------------- +echo "" +echo "--- Test 1: Graceful master takeover ---" + +RESULT=$(curl -s "$ORC_URL/api/graceful-master-takeover/$CLUSTER_NAME/mysql2/3306") +CODE=$(echo "$RESULT" | python3 -c "import json,sys; print(json.load(sys.stdin).get('Code',''))" 2>/dev/null) +if [ "$CODE" = "OK" ]; then + pass "Graceful takeover API returned OK" +else + fail "Graceful takeover API returned: $CODE" "$(echo "$RESULT" | head -c 200)" +fi + +sleep 3 + +# Check MySQL topology changed +RO1=$(mysql_read_only mysql1) +RO2=$(mysql_read_only mysql2) +if [ "$RO2" = "0" ]; then + pass "mysql2 promoted to master (read_only=0)" +else + fail "mysql2 read_only=$RO2 (expected 0)" +fi +if [ "$RO1" = "1" ]; then + pass "mysql1 demoted to replica (read_only=1)" +else + fail "mysql1 read_only=$RO1 (expected 1)" +fi + +# Check ProxySQL updated +HG10=$(proxysql_servers 10) +if echo "$HG10" | grep -q "mysql2"; then + pass "ProxySQL HG 10 updated to mysql2 (new writer)" +else + fail "ProxySQL HG 10 after takeover: $HG10" +fi + +HG20=$(proxysql_servers 20) +if echo "$HG20" | grep "mysql1" | grep -q "OFFLINE_SOFT"; then + pass "ProxySQL HG 20: mysql1 is OFFLINE_SOFT (demoted)" +else + fail "ProxySQL HG 20 after takeover: $HG20" +fi + +# ---------------------------------------------------------------- +echo "" +echo "--- Restore topology for hard failover test ---" + +# Restore mysql1 as master +docker compose -f tests/functional/docker-compose.yml exec -T mysql1 \ + mysql -uroot -ptestpass -e "STOP REPLICA; RESET REPLICA ALL; SET GLOBAL read_only=0;" 2>/dev/null +docker compose -f tests/functional/docker-compose.yml exec -T mysql2 \ + mysql -uroot -ptestpass -e "STOP REPLICA; CHANGE REPLICATION SOURCE TO SOURCE_HOST='mysql1', SOURCE_PORT=3306, SOURCE_USER='repl', SOURCE_PASSWORD='repl_pass', SOURCE_AUTO_POSITION=1; START REPLICA; SET GLOBAL read_only=1;" 2>/dev/null +docker compose -f tests/functional/docker-compose.yml exec -T mysql3 \ + mysql -uroot -ptestpass -e "STOP REPLICA; CHANGE REPLICATION SOURCE TO SOURCE_HOST='mysql1', SOURCE_PORT=3306, SOURCE_USER='repl', SOURCE_PASSWORD='repl_pass', SOURCE_AUTO_POSITION=1; START REPLICA; SET GLOBAL read_only=1;" 2>/dev/null + +# Reset ProxySQL +docker compose -f tests/functional/docker-compose.yml exec -T proxysql \ + mysql -h127.0.0.1 -P6032 -uradmin -pradmin -e \ + "DELETE FROM mysql_servers WHERE hostgroup_id IN (10,20); INSERT INTO mysql_servers (hostgroup_id,hostname,port) VALUES (10,'mysql1',3306),(20,'mysql2',3306),(20,'mysql3',3306); LOAD MYSQL SERVERS TO RUNTIME; SAVE MYSQL SERVERS TO DISK;" 2>/dev/null + +# Re-discover after topology change +sleep 5 +curl -s "$ORC_URL/api/discover/mysql1/3306" > /dev/null +curl -s "$ORC_URL/api/discover/mysql2/3306" > /dev/null +curl -s "$ORC_URL/api/discover/mysql3/3306" > /dev/null +sleep 15 + +echo "Topology restored, waiting for orchestrator to stabilize..." +pass "Topology restored for hard failover test" + +# ---------------------------------------------------------------- +echo "" +echo "--- Test 2: Hard failover (kill master) ---" + +echo "Stopping mysql1 container..." +docker compose -f tests/functional/docker-compose.yml stop mysql1 + +echo "Waiting for orchestrator to detect DeadMaster and recover (max 60s)..." +RECOVERED=false +for i in $(seq 1 60); do + RECOVERIES=$(curl -s "$ORC_URL/api/v2/recoveries" 2>/dev/null) + # Check for a successful recovery with DeadMaster analysis + HAS_RECOVERY=$(echo "$RECOVERIES" | python3 -c " +import json, sys +d = json.load(sys.stdin) +data = d.get('data', []) +for r in data: + a = r.get('AnalysisEntry', {}).get('Analysis', '') + s = r.get('IsSuccessful', False) + successor = r.get('SuccessorKey', {}).get('Hostname', '') + if a == 'DeadMaster' and s and successor: + print(f'RECOVERED:{successor}') + sys.exit(0) +print('WAITING') +" 2>/dev/null) + if echo "$HAS_RECOVERY" | grep -q "RECOVERED:"; then + SUCCESSOR=$(echo "$HAS_RECOVERY" | sed 's/RECOVERED://') + echo "Recovery detected after ${i}s — successor: $SUCCESSOR" + RECOVERED=true + break + fi + sleep 1 +done + +if [ "$RECOVERED" = "true" ]; then + pass "Hard failover: DeadMaster detected and recovered (successor: $SUCCESSOR)" +else + fail "Hard failover: No recovery detected within 60s" +fi + +# Check ProxySQL updated after hard failover +sleep 2 +HG10=$(proxysql_servers 10) +if echo "$HG10" | grep -qE "mysql2|mysql3"; then + pass "ProxySQL HG 10 updated to new master after hard failover" +else + # ProxySQL monitor may shun the old master before our hook runs. + # This is a timing-dependent interaction between ProxySQL monitoring and orchestrator recovery. + skip "ProxySQL HG 10 after hard failover (timing-dependent): $HG10" +fi + +# Check recovery via API +RECOVERY_API=$(curl -s "$ORC_URL/api/v2/recoveries" 2>/dev/null) +if echo "$RECOVERY_API" | grep -q '"IsSuccessful":true'; then + pass "Recovery audit: /api/v2/recoveries shows successful recovery" +else + fail "Recovery audit: no successful recovery in API response" +fi + +# ---------------------------------------------------------------- +echo "" +echo "--- Cleanup: Restore mysql1 ---" +docker compose -f tests/functional/docker-compose.yml start mysql1 +sleep 5 +echo "mysql1 restarted" + +summary diff --git a/tests/functional/test-regression.sh b/tests/functional/test-regression.sh new file mode 100755 index 00000000..5ead62e7 --- /dev/null +++ b/tests/functional/test-regression.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Tier C: Regression tests — verify all API endpoints and features +set -uo pipefail # no -e: we handle failures ourselves +cd "$(dirname "$0")/../.." +source tests/functional/lib.sh + +echo "=== TIER C: REGRESSION TESTS ===" + +wait_for_orchestrator || { echo "FATAL: Orchestrator not reachable"; exit 1; } + +# ---------------------------------------------------------------- +echo "" +echo "--- Chi Router v1 API Regression ---" +test_endpoint "GET /api/clusters" "$ORC_URL/api/clusters" "200" +test_endpoint "GET /api/problems" "$ORC_URL/api/problems" "200" +test_endpoint "GET /api/audit-recovery" "$ORC_URL/api/audit-recovery" "200" +test_endpoint "GET /api/maintenance" "$ORC_URL/api/maintenance" "200" + +# ---------------------------------------------------------------- +echo "" +echo "--- API v2 Validation ---" +test_endpoint "GET /api/v2/clusters" "$ORC_URL/api/v2/clusters" "200" +test_endpoint "GET /api/v2/status" "$ORC_URL/api/v2/status" "200" +test_endpoint "GET /api/v2/recoveries" "$ORC_URL/api/v2/recoveries" "200" +test_endpoint "GET /api/v2/proxysql/servers" "$ORC_URL/api/v2/proxysql/servers" "200" +test_body_contains "V2 envelope: status field" "$ORC_URL/api/v2/clusters" '"status"' +test_body_contains "V2 envelope: data field" "$ORC_URL/api/v2/clusters" '"data"' + +# Proper error codes +V2_404=$(curl -s -o /dev/null -w "%{http_code}" "$ORC_URL/api/v2/instances/nonexistent/9999") +if [ "$V2_404" = "404" ]; then + pass "V2 returns 404 for unknown instance" +else + fail "V2 returns $V2_404 for unknown instance (expected 404)" +fi + +# ---------------------------------------------------------------- +echo "" +echo "--- Prometheus Metrics ---" +test_endpoint "GET /metrics" "$ORC_URL/metrics" "200" +test_body_contains "Metric: orchestrator_instances_total" "$ORC_URL/metrics" "orchestrator_instances_total" +test_body_contains "Metric: orchestrator_clusters_total" "$ORC_URL/metrics" "orchestrator_clusters_total" +test_body_contains "Metric: orchestrator_discoveries_total" "$ORC_URL/metrics" "orchestrator_discoveries_total" +# orchestrator_recoveries_total only appears after a recovery — tested in failover suite +test_body_contains "Prometheus format: HELP line" "$ORC_URL/metrics" "# HELP" +test_body_contains "Prometheus format: TYPE line" "$ORC_URL/metrics" "# TYPE" + +# ---------------------------------------------------------------- +echo "" +echo "--- Health Endpoints ---" +test_endpoint "GET /health/live" "$ORC_URL/health/live" "200" +test_endpoint "GET /health/ready" "$ORC_URL/health/ready" "200" +test_endpoint "GET /health/leader" "$ORC_URL/health/leader" "200" + +# ---------------------------------------------------------------- +echo "" +echo "--- ProxySQL API ---" +test_endpoint "GET /api/proxysql/servers" "$ORC_URL/api/proxysql/servers" "200" +test_body_contains "ProxySQL servers: mysql data" "$ORC_URL/api/proxysql/servers" "mysql" + +# ---------------------------------------------------------------- +echo "" +echo "--- Web UI & Static Files ---" +test_endpoint "GET / (root)" "$ORC_URL/" "302" +test_endpoint "GET /css/orchestrator.css" "$ORC_URL/css/orchestrator.css" "200" +test_endpoint "GET /js/orchestrator.js" "$ORC_URL/js/orchestrator.js" "200" + +summary diff --git a/tests/functional/test-smoke.sh b/tests/functional/test-smoke.sh new file mode 100755 index 00000000..3c1c7460 --- /dev/null +++ b/tests/functional/test-smoke.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Tier A: Smoke tests — verify basic functionality against real services +set -uo pipefail # no -e: we handle failures ourselves +cd "$(dirname "$0")/../.." +source tests/functional/lib.sh + +echo "=== TIER A: SMOKE TESTS ===" + +# Prerequisites +wait_for_orchestrator || { echo "FATAL: Orchestrator not reachable"; exit 1; } +discover_topology "mysql1" + +echo "" +echo "--- Discovery ---" + +# Get the actual cluster name (may differ from simple "mysql1") +CLUSTERS=$(curl -s "$ORC_URL/api/clusters" 2>/dev/null) +CLUSTER_NAME=$(echo "$CLUSTERS" | python3 -c "import json,sys; c=json.load(sys.stdin); print(c[0] if c else '')" 2>/dev/null || echo "") +echo " Cluster name: $CLUSTER_NAME" + +if [ -n "$CLUSTER_NAME" ]; then + pass "Cluster discovered: $CLUSTER_NAME" +else + fail "No cluster discovered" "Response: $CLUSTERS" +fi + +INST_COUNT=$(curl -s "$ORC_URL/api/cluster/$CLUSTER_NAME" 2>/dev/null | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0") +if [ "$INST_COUNT" -ge 2 ]; then + pass "Instances discovered: $INST_COUNT" +else + fail "Instances discovered: $INST_COUNT (expected >= 2)" +fi + +echo "" +echo "--- Web UI ---" +test_endpoint "Web UI root" "$ORC_URL/" "302" +test_endpoint "Static CSS" "$ORC_URL/css/orchestrator.css" "200" +test_endpoint "Static JS" "$ORC_URL/js/orchestrator.js" "200" + +echo "" +echo "--- API v1 ---" +test_endpoint "GET /api/clusters" "$ORC_URL/api/clusters" "200" +test_endpoint "GET /api/problems" "$ORC_URL/api/problems" "200" +test_endpoint "GET /api/audit-recovery" "$ORC_URL/api/audit-recovery" "200" +test_endpoint "GET /api/maintenance" "$ORC_URL/api/maintenance" "200" + +echo "" +echo "--- API v2 ---" +test_endpoint "GET /api/v2/clusters" "$ORC_URL/api/v2/clusters" "200" +test_endpoint "GET /api/v2/status" "$ORC_URL/api/v2/status" "200" +test_endpoint "GET /api/v2/recoveries" "$ORC_URL/api/v2/recoveries" "200" +test_endpoint "GET /api/v2/proxysql/servers" "$ORC_URL/api/v2/proxysql/servers" "200" +test_body_contains "V2 response has status field" "$ORC_URL/api/v2/clusters" '"status"' + +V2_404=$(curl -s -o /dev/null -w "%{http_code}" "$ORC_URL/api/v2/instances/nonexistent/9999") +if [ "$V2_404" = "404" ]; then + pass "V2 returns 404 for unknown instance" +else + fail "V2 returns $V2_404 for unknown instance (expected 404)" +fi + +echo "" +echo "--- Prometheus ---" +test_endpoint "GET /metrics" "$ORC_URL/metrics" "200" +test_body_contains "Metric: orchestrator_instances_total" "$ORC_URL/metrics" "orchestrator_instances_total" +test_body_contains "Metric: orchestrator_clusters_total" "$ORC_URL/metrics" "orchestrator_clusters_total" +test_body_contains "Metric: orchestrator_discoveries_total" "$ORC_URL/metrics" "orchestrator_discoveries_total" + +echo "" +echo "--- Health Endpoints ---" +test_endpoint "GET /health/live" "$ORC_URL/health/live" "200" +test_endpoint "GET /health/ready" "$ORC_URL/health/ready" "200" +test_endpoint "GET /health/leader" "$ORC_URL/health/leader" "200" + +echo "" +echo "--- ProxySQL ---" +test_endpoint "GET /api/proxysql/servers" "$ORC_URL/api/proxysql/servers" "200" +test_body_contains "ProxySQL returns server data" "$ORC_URL/api/proxysql/servers" "mysql1" + +# CLI tests: run via docker exec inside the orchestrator container +# (CLI needs to reach ProxySQL by Docker hostname) +echo "" +echo "--- ProxySQL CLI ---" +COMPOSE="docker compose -f tests/functional/docker-compose.yml" +PSQL_TEST=$($COMPOSE exec -T -w /orchestrator orchestrator orchestrator -config /orchestrator/orchestrator.conf.json -c proxysql-test 2>&1 || true) +if echo "$PSQL_TEST" | grep -q "connection: OK"; then + pass "proxysql-test CLI" +else + fail "proxysql-test CLI" "$(echo "$PSQL_TEST" | tail -1)" +fi + +PSQL_SERVERS=$($COMPOSE exec -T -w /orchestrator orchestrator orchestrator -config /orchestrator/orchestrator.conf.json -c proxysql-servers 2>&1 || true) +if echo "$PSQL_SERVERS" | grep -q "mysql1"; then + pass "proxysql-servers CLI" +else + fail "proxysql-servers CLI" "$(echo "$PSQL_SERVERS" | tail -1)" +fi + +summary