ProxySQL · renecannao · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.github/workflows/functional.yml b/.github/workflows/functional.yml
@@ -107,6 +107,15 @@ jobs:
     - name: Run smoke tests
       run: bash tests/functional/test-smoke.sh
 
+    - name: Run topology discovery validation
+      run: bash tests/functional/test-topology-discovery.sh
+
+    - name: Run problem detection tests
+      run: bash tests/functional/test-problem-detection.sh
+
+    - name: Run topology operations tests
+      run: bash tests/functional/test-topology-operations.sh
+
     - name: Run regression tests
       run: bash tests/functional/test-regression.sh
 

diff --git a/tests/functional/lib.sh b/tests/functional/lib.sh
@@ -174,14 +174,15 @@ mysql_read_only() {
         mysql -uroot -ptestpass -Nse "SELECT @@read_only" 2>/dev/null
 }
 
-# Get MySQL replication source
+# Get MySQL replication source host
+# Uses tab-separated SHOW STATUS — Source_Host is column 2
 mysql_source_host() {
     local CONTAINER="$1"
     if mysql_is_57; then
         docker compose -f tests/functional/docker-compose.yml exec -T "$CONTAINER" \
-            mysql -uroot -ptestpass -Nse "SHOW SLAVE STATUS\G" 2>/dev/null | grep "Master_Host" | awk '{print $2}'
+            mysql -uroot -ptestpass -Nse "SHOW SLAVE STATUS" 2>/dev/null | awk -F'\t' '{print $2; exit}'
     else
         docker compose -f tests/functional/docker-compose.yml exec -T "$CONTAINER" \
-            mysql -uroot -ptestpass -Nse "SHOW REPLICA STATUS\G" 2>/dev/null | grep "Source_Host" | awk '{print $2}'
+            mysql -uroot -ptestpass -Nse "SHOW REPLICA STATUS" 2>/dev/null | awk -F'\t' '{print $2; exit}'
     fi
 }
diff --git a/tests/functional/test-named-channels.sh b/tests/functional/test-named-channels.sh
@@ -23,12 +23,22 @@ echo ""
 echo "--- Setup: Configure multi-source replication on mysql3 ---"
 
 # Create test database and table on mysql2 for the extra channel
+# Note: mysql2 is a replica with read_only=ON, but root has SUPER privilege
 $COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "
     CREATE DATABASE IF NOT EXISTS extra_db;
     CREATE TABLE IF NOT EXISTS extra_db.test (id INT PRIMARY KEY AUTO_INCREMENT, val VARCHAR(100));
     INSERT INTO extra_db.test (val) VALUES ('channel-test');
 " 2>/dev/null
 
+# Verify data exists on mysql2 before setting up channel
+DATA_ON_M2=$($COMPOSE exec -T mysql2 mysql -uroot -ptestpass -Nse \
+    "SELECT val FROM extra_db.test LIMIT 1" 2>/dev/null | tr -d '[:space:]')
+if [ "$DATA_ON_M2" = "channel-test" ]; then
+    echo "  Verified data on mysql2: $DATA_ON_M2"
+else
+    fail "Setup: test data not created on mysql2 (got: '$DATA_ON_M2')" "Check if writes are allowed on replica"
+fi
+
 # Add a named channel 'extra' on mysql3 replicating from mysql2
 CHANGE_SQL=$(mysql_change_source_channel_sql mysql2 3306 repl repl_pass extra)
 START_SQL=$(mysql_start_replica_sql)
@@ -49,15 +59,23 @@ else
     fail "Named channel 'extra' not running on mysql3 (status: $CHANNEL_STATUS)"
 fi
 
-# Verify data replicated through the extra channel
-sleep 2
-REPLICATED=$($COMPOSE exec -T mysql3 mysql -uroot -ptestpass -Nse \
-    "SELECT val FROM extra_db.test LIMIT 1" 2>/dev/null | tr -d '[:space:]')
+# Verify data replicated through the extra channel (poll up to 15s)
+echo "Waiting for data to replicate through named channel..."
+REPLICATED=""
+for i in $(seq 1 15); do
+    REPLICATED=$($COMPOSE exec -T mysql3 mysql -uroot -ptestpass -Nse \
+        "SELECT val FROM extra_db.test LIMIT 1" 2>/dev/null | tr -d '[:space:]')
+    if [ "$REPLICATED" = "channel-test" ]; then
+        break
+    fi
+    sleep 1
+done
 
 if [ "$REPLICATED" = "channel-test" ]; then
     pass "Data replicated through named channel 'extra'"
 else
-    fail "Data not replicated through named channel (got: $REPLICATED)"
+    fail "Data not replicated through named channel (got: '$REPLICATED')" \
+        "Channel ON but data missing - check GTID sets on mysql2 vs mysql3"
 fi
 
 # ----------------------------------------------------------------
@@ -108,4 +126,7 @@ else
     fail "Named channel 'extra' still present after cleanup"
 fi
 
+# Clean up test database on mysql2
+$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "DROP DATABASE IF EXISTS extra_db;" 2>/dev/null
+
-# Clean up test database on mysql2
-$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "DROP DATABASE IF EXISTS extra_db;" 2>/dev/null
+# Clean up test database on mysql2
+if ! $COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "DROP DATABASE IF EXISTS extra_db;" 2>/dev/null; then
+    echo "Error: Failed to drop extra_db on mysql2"
+    exit 1
+fi
-# Clean up test database on mysql2
-$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "DROP DATABASE IF EXISTS extra_db;" 2>/dev/null
+# Clean up test database on mysql2
+if ! $COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "DROP DATABASE IF EXISTS extra_db;" 2>/dev/null; then
+    echo "Error: Failed to drop extra_db on mysql2"
+    exit 1
+fi
 summary
diff --git a/tests/functional/test-problem-detection.sh b/tests/functional/test-problem-detection.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# Problem detection tests — verify orchestrator detects and clears
+# replication problems correctly
+set -uo pipefail  # no -e: we handle failures ourselves
+cd "$(dirname "$0")/../.."
+source tests/functional/lib.sh
-cd "$(dirname "$0")/../.."
-source tests/functional/lib.sh
+cd "$(dirname "$0")/../.." || { echo "FATAL: unable to cd to repository root"; exit 1; }
+source tests/functional/lib.sh || { echo "FATAL: unable to load tests/functional/lib.sh"; exit 1; }
-cd "$(dirname "$0")/../.."
-source tests/functional/lib.sh
+cd "$(dirname "$0")/../.." || { echo "FATAL: unable to cd to repository root"; exit 1; }
+source tests/functional/lib.sh || { echo "FATAL: unable to load tests/functional/lib.sh"; exit 1; }
+
+echo "=== PROBLEM DETECTION TESTS ==="
+
+COMPOSE="docker compose -f tests/functional/docker-compose.yml"
+STOP_SQL=$(mysql_stop_replica_sql)
+START_SQL=$(mysql_start_replica_sql)
+
+wait_for_orchestrator || { echo "FATAL: Orchestrator not reachable"; exit 1; }
+discover_topology "mysql1"
-discover_topology "mysql1"
+discover_topology "mysql1" || { echo "FATAL: initial topology discovery failed"; exit 1; }
-discover_topology "mysql1"
+discover_topology "mysql1" || { echo "FATAL: initial topology discovery failed"; exit 1; }
+
+# ----------------------------------------------------------------
+echo ""
+echo "--- Test 1: Detect stopped replication ---"
+
+# Stop replication on mysql2
+echo "Stopping replication on mysql2..."
+$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "$STOP_SQL" 2>/dev/null
+
+# Force re-discovery so orchestrator refreshes instance state immediately
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+sleep 2
+
+# Wait for orchestrator to detect the problem (poll up to 30s)
+echo "Waiting for orchestrator to detect stopped replication..."
+DETECTED=false
+for i in $(seq 1 30); do
+    PROBLEMS=$(curl -s --max-time 10 "$ORC_URL/api/problems" 2>/dev/null)
+    if echo "$PROBLEMS" | python3 -c "
+import json, sys
+problems = json.load(sys.stdin)
+for p in problems:
+    h = p.get('Key', {}).get('Hostname', '')
+    if 'mysql2' in h:
+        sys.exit(0)
+sys.exit(1)
+" 2>/dev/null; then
+        DETECTED=true
+        echo "Problem detected after ${i}s"
+        break
+    fi
+    sleep 1
+done
+
+if [ "$DETECTED" = "true" ]; then
+    pass "Orchestrator detected stopped replication on mysql2"
+else
+    fail "Orchestrator did not detect stopped replication on mysql2 within 30s"
+fi
+
+# Force another re-discovery to ensure instance data is fresh
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+sleep 2
+
+# Verify the specific problem shows replication threads stopped
+REPL_STATE=$(curl -s --max-time 10 "$ORC_URL/api/instance/mysql2/3306" 2>/dev/null | python3 -c "
+import json, sys
+inst = json.load(sys.stdin)
+sql = inst.get('ReplicationSQLThreadRuning', 'unknown')
+io = inst.get('ReplicationIOThreadRuning', 'unknown')
+print(f'SQL={sql},IO={io}')
+" 2>/dev/null || echo "unknown")
+
+if echo "$REPL_STATE" | grep -q "SQL=False"; then
+    pass "Orchestrator reports SQL thread stopped on mysql2"
+else
+    fail "Orchestrator replication state for mysql2: $REPL_STATE"
+fi
+
+# ----------------------------------------------------------------
+echo ""
+echo "--- Test 1b: Clear stopped replication ---"
+
+# Restart replication
+echo "Restarting replication on mysql2..."
+$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "$START_SQL" 2>/dev/null
+
+# Force re-discovery so orchestrator refreshes instance state
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+sleep 2
+
+# Wait for orchestrator to see replication running again
+echo "Waiting for replication to recover..."
+CLEARED=false
+for i in $(seq 1 30); do
+    # Force re-discovery each iteration
+    curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+    REPL_STATE=$(curl -s --max-time 10 "$ORC_URL/api/instance/mysql2/3306" 2>/dev/null | python3 -c "
+import json, sys
+inst = json.load(sys.stdin)
+sql = inst.get('ReplicationSQLThreadRuning', False)
+io = inst.get('ReplicationIOThreadRuning', False)
+print(f'{sql}:{io}')
+" 2>/dev/null || echo "False:False")
+    SQL_RUNNING=$(echo "$REPL_STATE" | cut -d: -f1)
+    IO_RUNNING=$(echo "$REPL_STATE" | cut -d: -f2)
+    if [ "$SQL_RUNNING" = "True" ]; then
+        CLEARED=true
+        echo "Replication recovered after ${i}s (SQL=True, IO=${IO_RUNNING})"
+        break
-    SQL_RUNNING=$(echo "$REPL_STATE" | cut -d: -f1)
-    IO_RUNNING=$(echo "$REPL_STATE" | cut -d: -f2)
-    if [ "$SQL_RUNNING" = "True" ]; then
-        CLEARED=true
-        echo "Replication recovered after ${i}s (SQL=True, IO=${IO_RUNNING})"
-        break
+    SQL_RUNNING=$(echo "$REPL_STATE" | cut -d: -f1)
+    IO_RUNNING=$(echo "$REPL_STATE" | cut -d: -f2)
+    if [ "$SQL_RUNNING" = "True" ] && [ "$IO_RUNNING" = "True" ]; then
+        CLEARED=true
+        echo "Replication recovered after ${i}s (SQL=True, IO=True)"
+        break
-    SQL_RUNNING=$(echo "$REPL_STATE" | cut -d: -f1)
-    IO_RUNNING=$(echo "$REPL_STATE" | cut -d: -f2)
-    if [ "$SQL_RUNNING" = "True" ]; then
-        CLEARED=true
-        echo "Replication recovered after ${i}s (SQL=True, IO=${IO_RUNNING})"
-        break
+    SQL_RUNNING=$(echo "$REPL_STATE" | cut -d: -f1)
+    IO_RUNNING=$(echo "$REPL_STATE" | cut -d: -f2)
+    if [ "$SQL_RUNNING" = "True" ] && [ "$IO_RUNNING" = "True" ]; then
+        CLEARED=true
+        echo "Replication recovered after ${i}s (SQL=True, IO=True)"
+        break
+    fi
+    sleep 1
+done
+
+if [ "$CLEARED" = "true" ]; then
+    pass "Stopped replication problem cleared after restart"
+else
+    fail "Replication SQL thread not running on mysql2 after 30s (state: $REPL_STATE)"
+fi
+
+# ----------------------------------------------------------------
+echo ""
+echo "--- Test 2: Detect read_only mismatch (writable replica) ---"
+
+# Make mysql2 writable (it should be read-only as a replica)
+echo "Setting mysql2 read_only=0 (simulating writable replica)..."
+$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "SET GLOBAL read_only=0" 2>/dev/null
+
+# Force re-discovery so orchestrator refreshes instance state
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+sleep 2
+
+# Wait for orchestrator to detect the problem
+echo "Waiting for orchestrator to detect writable replica..."
+DETECTED=false
+for i in $(seq 1 30); do
+    INST=$(curl -s --max-time 10 "$ORC_URL/api/instance/mysql2/3306" 2>/dev/null)
+    IS_RO=$(echo "$INST" | python3 -c "import json,sys; print(json.load(sys.stdin).get('ReadOnly', True))" 2>/dev/null || echo "True")
+    if [ "$IS_RO" = "False" ]; then
+        DETECTED=true
+        echo "Writable replica detected after ${i}s"
+        break
+    fi
+    sleep 1
+done
+
+if [ "$DETECTED" = "true" ]; then
+    pass "Orchestrator detected mysql2 is writable (read_only=false while replicating)"
+else
+    fail "Orchestrator did not detect writable replica within 30s"
+fi
+
+# Restore read_only
+echo "Restoring read_only=1 on mysql2..."
+$COMPOSE exec -T mysql2 mysql -uroot -ptestpass -e "SET GLOBAL read_only=1" 2>/dev/null
+
+# Force re-discovery
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql2/3306" > /dev/null 2>&1
+sleep 2
+
+# Wait for it to clear
+CLEARED=false
+for i in $(seq 1 15); do
+    IS_RO=$(curl -s --max-time 10 "$ORC_URL/api/instance/mysql2/3306" 2>/dev/null | python3 -c "import json,sys; print(json.load(sys.stdin).get('ReadOnly', False))" 2>/dev/null || echo "False")
+    if [ "$IS_RO" = "True" ]; then
+        CLEARED=true
+        break
+    fi
+    sleep 1
+done
+
+if [ "$CLEARED" = "true" ]; then
+    pass "Writable replica problem cleared after restoring read_only"
+else
+    fail "read_only still reported as false after 15s"
+fi
+
+# ----------------------------------------------------------------
+echo ""
+echo "--- Test 3: Detect errant GTID ---"
+
+# Inject an errant transaction on mysql3
+echo "Injecting errant transaction on mysql3..."
+$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
+    SET GLOBAL read_only=0;
+    SET GLOBAL super_read_only=0;
+    CREATE DATABASE IF NOT EXISTS errant_detect_test;
+    SET GLOBAL read_only=1;
+    SET GLOBAL super_read_only=1;
+" 2>/dev/null
+
+# Force re-discovery so orchestrator picks up the errant GTID
+curl -s --max-time 10 "$ORC_URL/api/discover/mysql3/3306" > /dev/null 2>&1
+sleep 2
+
+# Wait for orchestrator to detect errant GTID
+echo "Waiting for orchestrator to detect errant GTID..."
+DETECTED=false
+for i in $(seq 1 30); do
+    GTID_ERRANT=$(curl -s --max-time 10 "$ORC_URL/api/instance/mysql3/3306" 2>/dev/null | python3 -c "
+import json, sys
+inst = json.load(sys.stdin)
+errant = inst.get('GtidErrant', '')
+print(errant if errant else '')
+" 2>/dev/null || echo "")
+    if [ -n "$GTID_ERRANT" ]; then
+        DETECTED=true
+        echo "Errant GTID detected after ${i}s: $GTID_ERRANT"
+        break
+    fi
+    sleep 1
+done
+
+if [ "$DETECTED" = "true" ]; then
+    pass "Orchestrator detected errant GTID on mysql3"
+else
+    fail "Orchestrator did not detect errant GTID within 30s"
+fi
+
+# Cleanup errant DB (GTID remains but that's OK)
+$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
+    SET GLOBAL read_only=0;
+    DROP DATABASE IF EXISTS errant_detect_test;
+    SET GLOBAL read_only=1;
+" 2>/dev/null
-$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
-    SET GLOBAL read_only=0;
-    DROP DATABASE IF EXISTS errant_detect_test;
-    SET GLOBAL read_only=1;
-" 2>/dev/null
+$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
+    SET GLOBAL read_only=0;
+    SET GLOBAL super_read_only=0;
+    DROP DATABASE IF EXISTS errant_detect_test;
+    SET GLOBAL read_only=1;
+    SET GLOBAL super_read_only=1;
+" 2>/dev/null
-$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
-    SET GLOBAL read_only=0;
-    DROP DATABASE IF EXISTS errant_detect_test;
-    SET GLOBAL read_only=1;
-" 2>/dev/null
+$COMPOSE exec -T mysql3 mysql -uroot -ptestpass -e "
+    SET GLOBAL read_only=0;
+    SET GLOBAL super_read_only=0;
+    DROP DATABASE IF EXISTS errant_detect_test;
+    SET GLOBAL read_only=1;
+    SET GLOBAL super_read_only=1;
+" 2>/dev/null
+
+summary