From e1797e0d9dfcfd616e317a218f68528144c54114 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sun, 23 Nov 2025 13:08:15 -0600 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=A4=96=20fix:=20repair=20terminal-ben?= =?UTF-8?q?ch=20agent=20path=20and=20source=20execution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates mux-run.sh to point to correct src/cli/debug/agentSessionCli.ts path. Updates workerPool.ts to support running tokenizer worker from source (Bun) when dist is missing. Updates benchmarking docs to reflect correct path. --- benchmarks/terminal_bench/mux-run.sh | 2 +- docs/benchmarking.md | 2 +- src/node/utils/main/workerPool.ts | 12 +++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/benchmarks/terminal_bench/mux-run.sh b/benchmarks/terminal_bench/mux-run.sh index 89e0ba4b0..52cc14ab4 100644 --- a/benchmarks/terminal_bench/mux-run.sh +++ b/benchmarks/terminal_bench/mux-run.sh @@ -77,7 +77,7 @@ ensure_git_repo "${project_path}" log "starting mux agent session for ${project_path}" cd "${MUX_APP_ROOT}" -cmd=(bun src/debug/agentSessionCli.ts +cmd=(bun src/cli/debug/agentSessionCli.ts --config-root "${MUX_CONFIG_ROOT}" --project-path "${project_path}" --workspace-path "${project_path}" diff --git a/docs/benchmarking.md b/docs/benchmarking.md index 4afbf2f06..d35ac0f67 100644 --- a/docs/benchmarking.md +++ b/docs/benchmarking.md @@ -65,7 +65,7 @@ The adapter lives in `benchmarks/terminal_bench/mux_agent.py`. For each task it: 1. Copies the mux repository (package manifests + `src/`) into `/tmp/mux-app` inside the container. 2. Ensures Bun exists, then runs `bun install --frozen-lockfile`. -3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`). +3. Launches `src/cli/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`). `MUX_MODEL` accepts either the mux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide. diff --git a/src/node/utils/main/workerPool.ts b/src/node/utils/main/workerPool.ts index df2ee321c..8a4fbf4cf 100644 --- a/src/node/utils/main/workerPool.ts +++ b/src/node/utils/main/workerPool.ts @@ -1,5 +1,5 @@ import { Worker } from "node:worker_threads"; -import { join, dirname, sep } from "node:path"; +import { join, dirname, sep, extname } from "node:path"; interface WorkerRequest { messageId: number; @@ -37,7 +37,13 @@ const hasDist = pathParts.includes("dist"); const srcIndex = pathParts.lastIndexOf("src"); let workerDir: string; -if (srcIndex !== -1 && !hasDist) { +let workerFile = "tokenizer.worker.js"; + +if (extname(__filename) === ".ts") { + // Running from source (e.g. via Bun) + workerDir = currentDir; + workerFile = "tokenizer.worker.ts"; +} else if (srcIndex !== -1 && !hasDist) { // Replace 'src' with 'dist' in the path (only if not already in dist) pathParts[srcIndex] = "dist"; workerDir = pathParts.join(sep); @@ -45,7 +51,7 @@ if (srcIndex !== -1 && !hasDist) { workerDir = currentDir; } -const workerPath = join(workerDir, "tokenizer.worker.js"); +const workerPath = join(workerDir, workerFile); const worker = new Worker(workerPath); // Handle messages from worker From d2d2dec29136ba71e01ddcaea606bbc16e6fbf43 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sun, 23 Nov 2025 19:38:01 -0600 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=A4=96=20ci:=20add=20static=20check?= =?UTF-8?q?=20for=20terminal-bench=20agent=20configuration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scripts/check-bench-agent.sh to verify that the agent entry point referenced in mux-run.sh exists and loads without import errors. Includes this check in the main 'static-check' make target. --- Makefile | 5 +++- scripts/check-bench-agent.sh | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100755 scripts/check-bench-agent.sh diff --git a/Makefile b/Makefile index 866e23f8b..3a5b8534f 100644 --- a/Makefile +++ b/Makefile @@ -213,7 +213,10 @@ build/icon.png: docs/img/logo.webp scripts/generate-icons.ts @bun scripts/generate-icons.ts png ## Quality checks (can run in parallel) -static-check: lint typecheck fmt-check check-eager-imports ## Run all static checks (includes startup performance checks) +static-check: lint typecheck fmt-check check-eager-imports check-bench-agent ## Run all static checks (includes startup performance checks) + +check-bench-agent: ## Verify terminal-bench agent configuration and imports + @./scripts/check-bench-agent.sh lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target) @./scripts/lint.sh diff --git a/scripts/check-bench-agent.sh b/scripts/check-bench-agent.sh new file mode 100755 index 000000000..f006ee2fd --- /dev/null +++ b/scripts/check-bench-agent.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This script verifies that the terminal-bench agent entry point +# referenced in mux-run.sh is valid and can be executed (imports resolve). + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MUX_RUN_SH="$REPO_ROOT/benchmarks/terminal_bench/mux-run.sh" + +echo "Checking terminal-bench agent configuration..." + +if [[ ! -f "$MUX_RUN_SH" ]]; then + echo "❌ Error: $MUX_RUN_SH not found" + exit 1 +fi + +# Extract the agent CLI path from mux-run.sh +# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts +CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2) + +if [[ -z "$CLI_PATH_MATCH" ]]; then + echo "❌ Error: Could not find agent CLI path in $MUX_RUN_SH" + exit 1 +fi + +FULL_CLI_PATH="$REPO_ROOT/$CLI_PATH_MATCH" + +echo "Found agent CLI path: $CLI_PATH_MATCH" + +if [[ ! -f "$FULL_CLI_PATH" ]]; then + echo "❌ Error: Referenced file $FULL_CLI_PATH does not exist" + exit 1 +fi + +echo "Verifying agent CLI startup (checking imports)..." + +# Run with --help or no args to check if it boots without crashing on imports +# We expect it to fail with "Unknown option" or "workspace-path required" but NOT with "Module not found" or "worker error" +if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then + # It failed, which is expected (no args/bad args), but we need to check WHY + exit_code=$? + + # Check for known import/worker errors + if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then + echo "❌ Error: Agent CLI failed to start due to import/worker errors:" + echo "$output" + exit 1 + fi + + # If it failed just because of arguments, that's fine - it means the code loaded. + echo "✅ Agent CLI loaded successfully (ignoring argument errors)" +else + echo "✅ Agent CLI ran successfully" +fi + +echo "Terminal-bench agent check passed." From 27f634f124d2254be87614c29c329b72658dce37 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sun, 23 Nov 2025 19:41:45 -0600 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=A4=96=20style:=20format=20scripts/ch?= =?UTF-8?q?eck-bench-agent.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/check-bench-agent.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/check-bench-agent.sh b/scripts/check-bench-agent.sh index f006ee2fd..09b693ad0 100755 --- a/scripts/check-bench-agent.sh +++ b/scripts/check-bench-agent.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# This script verifies that the terminal-bench agent entry point +# This script verifies that the terminal-bench agent entry point # referenced in mux-run.sh is valid and can be executed (imports resolve). REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" @@ -39,14 +39,14 @@ echo "Verifying agent CLI startup (checking imports)..." if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then # It failed, which is expected (no args/bad args), but we need to check WHY exit_code=$? - + # Check for known import/worker errors if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then echo "❌ Error: Agent CLI failed to start due to import/worker errors:" echo "$output" exit 1 fi - + # If it failed just because of arguments, that's fine - it means the code loaded. echo "✅ Agent CLI loaded successfully (ignoring argument errors)" else From b7023a742d7a060cb97b90edd0ecc1566cf9b1d0 Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 24 Nov 2025 15:00:33 -0600 Subject: [PATCH 4/4] fix: detect Bun vs Node for worker loading The previous fix used extname(__filename) === '.ts' to detect source execution, but ts-jest also sets __filename to .ts while running under Node. Node cannot load .ts workers directly (only Bun can), so we now check process.isBun to distinguish between: - Bun running .ts source -> use tokenizer.worker.ts - Node/ts-jest running .ts source -> use tokenizer.worker.js from dist/ --- src/node/utils/main/workerPool.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/node/utils/main/workerPool.ts b/src/node/utils/main/workerPool.ts index 8a4fbf4cf..40968276c 100644 --- a/src/node/utils/main/workerPool.ts +++ b/src/node/utils/main/workerPool.ts @@ -39,8 +39,12 @@ const srcIndex = pathParts.lastIndexOf("src"); let workerDir: string; let workerFile = "tokenizer.worker.js"; -if (extname(__filename) === ".ts") { - // Running from source (e.g. via Bun) +// Check if we're running under Bun (not Node with ts-jest) +// ts-jest transpiles .ts files but runs them via Node, which can't load .ts workers +const isBun = !!(process as unknown as { isBun?: boolean }).isBun; + +if (isBun && extname(__filename) === ".ts") { + // Running from source via Bun - use .ts worker directly workerDir = currentDir; workerFile = "tokenizer.worker.ts"; } else if (srcIndex !== -1 && !hasDist) {