|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# This script verifies that the terminal-bench agent entry point |
| 5 | +# referenced in mux-run.sh is valid and can be executed (imports resolve). |
| 6 | + |
| 7 | +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| 8 | +MUX_RUN_SH="$REPO_ROOT/benchmarks/terminal_bench/mux-run.sh" |
| 9 | + |
| 10 | +echo "Checking terminal-bench agent configuration..." |
| 11 | + |
| 12 | +if [[ ! -f "$MUX_RUN_SH" ]]; then |
| 13 | + echo "❌ Error: $MUX_RUN_SH not found" |
| 14 | + exit 1 |
| 15 | +fi |
| 16 | + |
| 17 | +# Extract the agent CLI path from mux-run.sh |
| 18 | +# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts |
| 19 | +CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2) |
| 20 | + |
| 21 | +if [[ -z "$CLI_PATH_MATCH" ]]; then |
| 22 | + echo "❌ Error: Could not find agent CLI path in $MUX_RUN_SH" |
| 23 | + exit 1 |
| 24 | +fi |
| 25 | + |
| 26 | +FULL_CLI_PATH="$REPO_ROOT/$CLI_PATH_MATCH" |
| 27 | + |
| 28 | +echo "Found agent CLI path: $CLI_PATH_MATCH" |
| 29 | + |
| 30 | +if [[ ! -f "$FULL_CLI_PATH" ]]; then |
| 31 | + echo "❌ Error: Referenced file $FULL_CLI_PATH does not exist" |
| 32 | + exit 1 |
| 33 | +fi |
| 34 | + |
| 35 | +echo "Verifying agent CLI startup (checking imports)..." |
| 36 | + |
| 37 | +# Run with --help or no args to check if it boots without crashing on imports |
| 38 | +# We expect it to fail with "Unknown option" or "workspace-path required" but NOT with "Module not found" or "worker error" |
| 39 | +if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then |
| 40 | + # It failed, which is expected (no args/bad args), but we need to check WHY |
| 41 | + exit_code=$? |
| 42 | + |
| 43 | + # Check for known import/worker errors |
| 44 | + if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then |
| 45 | + echo "❌ Error: Agent CLI failed to start due to import/worker errors:" |
| 46 | + echo "$output" |
| 47 | + exit 1 |
| 48 | + fi |
| 49 | + |
| 50 | + # If it failed just because of arguments, that's fine - it means the code loaded. |
| 51 | + echo "✅ Agent CLI loaded successfully (ignoring argument errors)" |
| 52 | +else |
| 53 | + echo "✅ Agent CLI ran successfully" |
| 54 | +fi |
| 55 | + |
| 56 | +echo "Terminal-bench agent check passed." |
0 commit comments