Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ build/icon.png: docs/img/logo.webp scripts/generate-icons.ts
@bun scripts/generate-icons.ts png

## Quality checks (can run in parallel)
static-check: lint typecheck fmt-check check-eager-imports ## Run all static checks (includes startup performance checks)
static-check: lint typecheck fmt-check check-eager-imports check-bench-agent ## Run all static checks (includes startup performance checks)

check-bench-agent: ## Verify terminal-bench agent configuration and imports
@./scripts/check-bench-agent.sh

lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target)
@./scripts/lint.sh
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/terminal_bench/mux-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ ensure_git_repo "${project_path}"
log "starting mux agent session for ${project_path}"
cd "${MUX_APP_ROOT}"

cmd=(bun src/debug/agentSessionCli.ts
cmd=(bun src/cli/debug/agentSessionCli.ts
--config-root "${MUX_CONFIG_ROOT}"
--project-path "${project_path}"
--workspace-path "${project_path}"
Expand Down
2 changes: 1 addition & 1 deletion docs/benchmarking.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ The adapter lives in `benchmarks/terminal_bench/mux_agent.py`. For each task it:

1. Copies the mux repository (package manifests + `src/`) into `/tmp/mux-app` inside the container.
2. Ensures Bun exists, then runs `bun install --frozen-lockfile`.
3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).
3. Launches `src/cli/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).

`MUX_MODEL` accepts either the mux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide.

Expand Down
56 changes: 56 additions & 0 deletions scripts/check-bench-agent.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -euo pipefail

# This script verifies that the terminal-bench agent entry point
# referenced in mux-run.sh is valid and can be executed (imports resolve).

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
MUX_RUN_SH="$REPO_ROOT/benchmarks/terminal_bench/mux-run.sh"

echo "Checking terminal-bench agent configuration..."

if [[ ! -f "$MUX_RUN_SH" ]]; then
echo "❌ Error: $MUX_RUN_SH not found"
exit 1
fi

# Extract the agent CLI path from mux-run.sh
# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts
CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2)

if [[ -z "$CLI_PATH_MATCH" ]]; then
echo "❌ Error: Could not find agent CLI path in $MUX_RUN_SH"
exit 1
fi

FULL_CLI_PATH="$REPO_ROOT/$CLI_PATH_MATCH"

echo "Found agent CLI path: $CLI_PATH_MATCH"

if [[ ! -f "$FULL_CLI_PATH" ]]; then
echo "❌ Error: Referenced file $FULL_CLI_PATH does not exist"
exit 1
fi

echo "Verifying agent CLI startup (checking imports)..."

# Run with --help or no args to check if it boots without crashing on imports
# We expect it to fail with "Unknown option" or "workspace-path required" but NOT with "Module not found" or "worker error"
if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then
# It failed, which is expected (no args/bad args), but we need to check WHY
exit_code=$?

# Check for known import/worker errors
if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then
echo "❌ Error: Agent CLI failed to start due to import/worker errors:"
echo "$output"
exit 1
fi

# If it failed just because of arguments, that's fine - it means the code loaded.
echo "✅ Agent CLI loaded successfully (ignoring argument errors)"
else
echo "✅ Agent CLI ran successfully"
fi

echo "Terminal-bench agent check passed."
16 changes: 13 additions & 3 deletions src/node/utils/main/workerPool.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Worker } from "node:worker_threads";
import { join, dirname, sep } from "node:path";
import { join, dirname, sep, extname } from "node:path";

interface WorkerRequest {
messageId: number;
Expand Down Expand Up @@ -37,15 +37,25 @@ const hasDist = pathParts.includes("dist");
const srcIndex = pathParts.lastIndexOf("src");

let workerDir: string;
if (srcIndex !== -1 && !hasDist) {
let workerFile = "tokenizer.worker.js";

// Check if we're running under Bun (not Node with ts-jest)
// ts-jest transpiles .ts files but runs them via Node, which can't load .ts workers
const isBun = !!(process as unknown as { isBun?: boolean }).isBun;

if (isBun && extname(__filename) === ".ts") {
// Running from source via Bun - use .ts worker directly
workerDir = currentDir;
workerFile = "tokenizer.worker.ts";
} else if (srcIndex !== -1 && !hasDist) {
// Replace 'src' with 'dist' in the path (only if not already in dist)
pathParts[srcIndex] = "dist";
workerDir = pathParts.join(sep);
} else {
workerDir = currentDir;
}

const workerPath = join(workerDir, "tokenizer.worker.js");
const workerPath = join(workerDir, workerFile);
const worker = new Worker(workerPath);

// Handle messages from worker
Expand Down