diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml
new file mode 100644
index 000000000..666330d52
--- /dev/null
+++ b/.github/workflows/tests_lightweight.yml
@@ -0,0 +1,425 @@
+# Lightweight test suite: runs all tests with -m "not llm_call" across 8 fixed per-test shards.
+# For the full test suite (with LLM calls, matrix parallelism, cache consolidation),
+# use tests.yml via workflow_dispatch.
+name: Tests (lightweight)
+run-name: >-
+ ${{
+ github.event.head_commit.message ||
+ github.event.pull_request.title
+ }}
+
+on:
+ pull_request:
+ branches:
+ - main
+ - staging
+
+jobs:
+ pytest:
+ name: pytest (lightweight shard ${{ matrix.shard }}/8)
+ runs-on: ubuntu-latest-8-cores
+ environment: unity-testing
+ timeout-minutes: 130
+ strategy:
+ fail-fast: false
+ matrix:
+ shard: [1, 2, 3, 4, 5, 6, 7, 8]
+ env:
+ LIGHTWEIGHT_SHARD_COUNT: "8"
+ UNIFY_TESTS_RAND_PROJ: "false"
+ UNIFY_TESTS_DELETE_PROJ_ON_START: "false"
+ UNIFY_TESTS_DELETE_PROJ_ON_EXIT: "false"
+ UNILLM_CACHE: "true"
+ ORCHESTRA_REPO_PATH: ${{ github.workspace }}/orchestra
+ ORCHESTRA_URL: ${{ vars.ORCHESTRA_URL }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ UNITY_COMMS_URL: ${{ vars.UNITY_COMMS_URL }}
+ GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
+ GCP_LOCATION: ${{ vars.GCP_LOCATION }}
+ ORCHESTRA_GCP_BUCKET_NAME: ${{ vars.GCP_BUCKET_LOGS }}
+ ORCHESTRA_GCP_ASSISTANT_MEDIA_BUCKET_NAME: ${{ vars.GCP_BUCKET_ASSISTANT_IMAGES }}
+ ORCHESTRA_GCP_ASSISTANT_CALL_RECORDINGS_BUCKET_NAME: ${{ vars.GCP_BUCKET_RECORDINGS }}
+ TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
+ UNIFY_KEY: ${{ secrets.UNIFY_KEY }}
+ PYTHONPATH: ${{ github.workspace }}/unillm:${{ github.workspace }}/unify
+ steps:
+ - uses: actions/checkout@v6
+ with:
+ fetch-depth: 1
+
+ - name: Clone orchestra repo for local deployment
+ uses: actions/checkout@v6
+ with:
+ repository: unifyai/orchestra
+ ref: ${{ vars.LOCAL_ORCHESTRA_BRANCH || (github.ref_name == 'main' && 'main' || 'staging') }}
+ path: orchestra
+ token: ${{ secrets.CLONE_TOKEN }}
+ fetch-depth: 1
+
+ - name: Clone unify repo
+ uses: actions/checkout@v6
+ with:
+ repository: unifyai/unify
+ ref: ${{ github.ref_name == 'main' && 'main' || 'staging' }}
+ path: unify
+ token: ${{ secrets.CLONE_TOKEN }}
+ fetch-depth: 1
+
+ - name: Clone unillm repo
+ uses: actions/checkout@v6
+ with:
+ repository: unifyai/unillm
+ ref: ${{ github.ref_name == 'main' && 'main' || 'staging' }}
+ path: unillm
+ token: ${{ secrets.CLONE_TOKEN }}
+ fetch-depth: 1
+
+ - name: Authenticate to Google Cloud
+ uses: google-github-actions/auth@v2
+ with:
+ credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
+
+ - name: Get orchestra commit SHA
+ id: orchestra-sha
+ run: echo "sha=$(git -C orchestra rev-parse HEAD)" >> $GITHUB_OUTPUT
+
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: '3.12'
+
+ - name: Cache apt packages
+ uses: awalsh128/cache-apt-pkgs-action@latest
+ with:
+ packages: >-
+ tmux zsh portaudio19-dev libnss3-dev libatk-bridge2.0-dev libdrm2
+ libxkbcommon0 libgtk-3-dev libgbm-dev libgirepository1.0-dev
+ libasound2t64 libatspi2.0-0 libgtk-3-0 libnspr4 libwayland-client0
+ libxcomposite1 libxdamage1 libxrandr2 xvfb
+ version: 1.1
+
+ - name: Install uv and poetry
+ run: |
+ pip install uv
+ pip install poetry
+
+ - name: Cache uv dependencies
+ uses: actions/cache@v5
+ with:
+ path: |
+ ~/.cache/uv
+ .venv
+ key: uv-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'uv.lock', 'unillm/pyproject.toml', 'unify/pyproject.toml') }}
+ restore-keys: |
+ uv-${{ runner.os }}-
+
+ - name: Configure unify/unillm paths for CI
+ run: |
+ sed -i 's|path = "../unify"|path = "./unify"|g' pyproject.toml
+ sed -i 's|path = "../unillm"|path = "./unillm"|g' pyproject.toml
+
+ - name: Sync dependencies with uv
+ run: |
+ uv sync --all-groups
+
+ - name: Verify local packages installed correctly
+ run: |
+ uv run python -c "
+ import sys
+ print('Python path (first 5):')
+ for p in sys.path[:5]:
+ print(f' {p}')
+
+ from unillm import AsyncUnify, Unify
+ import unify
+
+ print(f'✓ unillm: {AsyncUnify.__module__}')
+ print(f'✓ unify: {unify.__file__}')
+ assert hasattr(unify, 'create_project'), f'Wrong unify package!'
+ print('✓ Local packages verified successfully')
+ "
+
+ - name: Cache Playwright assets
+ uses: actions/cache@v5
+ id: playwright-cache
+ with:
+ path: ~/.cache/ms-playwright
+ key: playwright-${{ runner.os }}-${{ hashFiles('pyproject.toml') }}
+ restore-keys: |
+ playwright-${{ runner.os }}-
+
+ - name: Install Playwright assets
+ if: steps.playwright-cache.outputs.cache-hit != 'true'
+ run: |
+ uv run playwright install --with-deps
+
+ - name: Cache orchestra poetry dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pypoetry
+ key: poetry-orchestra-${{ runner.os }}-${{ steps.orchestra-sha.outputs.sha }}
+ restore-keys: |
+ poetry-orchestra-${{ runner.os }}-
+
+ - name: Install orchestra dependencies
+ run: |
+ cd "$ORCHESTRA_REPO_PATH"
+ poetry install --no-interaction
+
+ - name: Collect test nodes for this shard
+ id: collect-nodes
+ env:
+ LIGHTWEIGHT_SHARD: ${{ matrix.shard }}
+ SKIP_UNITY_TEST_INIT: "1"
+ run: |
+ SHARD_NODES_FILE="$(mktemp)"
+ echo "shard_nodes_file=$SHARD_NODES_FILE" >> "$GITHUB_OUTPUT"
+ export SHARD_NODES_FILE
+
+ uv run python - <<'PY'
+ import os
+ import subprocess
+ import sys
+ from pathlib import Path
+
+ cmd = [
+ sys.executable,
+ "-m",
+ "pytest",
+ "--collect-only",
+ "-q",
+ "-m",
+ "not llm_call",
+ "tests/",
+ "--ignore=tests/actor",
+ "--ignore=tests/conversation_manager",
+ "--ignore=tests/async_tool_loop",
+ "--ignore=tests/agent_service",
+ "--ignore=tests/demo_url_mapping",
+ ]
+ result = subprocess.run(cmd, text=True, capture_output=True)
+ if result.returncode != 0:
+ print(result.stdout)
+ print(result.stderr, file=sys.stderr)
+ raise SystemExit(result.returncode)
+
+ nodes = [line.strip() for line in result.stdout.splitlines() if "::" in line]
+ shard = int(os.environ["LIGHTWEIGHT_SHARD"])
+ shard_count = int(os.environ["LIGHTWEIGHT_SHARD_COUNT"])
+ shard_nodes = [node for index, node in enumerate(nodes) if index % shard_count == shard - 1]
+ Path(os.environ["SHARD_NODES_FILE"]).write_text("".join(f"{node}\n" for node in shard_nodes))
+
+ print(f"Collected {len(nodes)} lightweight nodes")
+ print(f"Shard {shard}/{shard_count}: {len(shard_nodes)} nodes")
+
+ if not shard_nodes:
+ raise SystemExit(f"Shard {shard}/{shard_count} has no collected tests")
+ PY
+
+ - name: Run tests
+ id: run-tests
+ run: |
+ set +e
+ SHARD_NODES_FILE='${{ steps.collect-nodes.outputs.shard_nodes_file }}'
+ SHARD_NODE_COUNT=$(wc -l < "$SHARD_NODES_FILE" | tr -d ' ')
+
+ Xvfb :99 -screen 0 1920x1080x24 &
+ XVFB_PID=$!
+ export DISPLAY=:99
+ trap "kill $XVFB_PID 2>/dev/null" EXIT
+
+ echo "Running shard ${{ matrix.shard }}/8 with $SHARD_NODE_COUNT explicit test nodes from $SHARD_NODES_FILE"
+ tests/parallel_run.sh --timeout 7200 --skip-collection --from-file "$SHARD_NODES_FILE"
+ TEST_EXIT_CODE=$?
+ echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT"
+ exit $TEST_EXIT_CODE
+
+ - name: Dump orchestra logs on failure
+ if: failure()
+ run: |
+ echo "=== Orchestra Server Logs ==="
+ if [ -f /tmp/orchestra-local-server.log ]; then
+ cat /tmp/orchestra-local-server.log
+ mkdir -p logs/orchestra
+ cp /tmp/orchestra-local-server.log logs/orchestra/server.log
+ echo "(Copied to logs/orchestra/server.log for artifact upload)"
+ else
+ echo "No orchestra log file found at /tmp/orchestra-local-server.log"
+ fi
+ echo ""
+ echo "=== Orchestra Server Process Status ==="
+ if [ -f /tmp/orchestra-local-server.pid ]; then
+ PID=$(cat /tmp/orchestra-local-server.pid)
+ echo "Expected PID: $PID"
+ if ps -p "$PID" > /dev/null 2>&1; then
+ echo "Process is still running"
+ else
+ echo "Process is NOT running"
+ fi
+ else
+ echo "No PID file found"
+ fi
+ echo ""
+ echo "=== Any orchestra-related processes ==="
+ ps aux | grep -E "(orchestra|uvicorn)" | grep -v grep || echo "None found"
+
+ - name: Generate failure summary
+ if: failure()
+ run: |
+ ARTIFACTS_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts"
+
+ TOTAL_COUNT=0
+ if [ -d "logs/pytest" ]; then
+ TOTAL_COUNT=$(find logs/pytest -name "*.txt" -type f 2>/dev/null | wc -l | tr -d ' ')
+ fi
+
+ FAILED_SESSIONS=""
+ FAIL_COUNT=0
+ for sock in /tmp/tmux-$(id -u)/unity*; do
+ if [ -S "$sock" ]; then
+ socket_name=$(basename "$sock")
+ FAILED_SESSIONS=$(tmux -L "$socket_name" ls 2>/dev/null | sed 's/: .*//' | grep '^f ' || true)
+ FAIL_COUNT=$(echo "$FAILED_SESSIONS" | grep -c '^f ' || echo 0)
+ break
+ fi
+ done
+
+ PASS_COUNT=$((TOTAL_COUNT - FAIL_COUNT))
+
+ echo "=== Test Results ==="
+ echo "✅ $PASS_COUNT passed, ❌ $FAIL_COUNT failed"
+ echo ""
+ echo "Failed tests:"
+ echo "$FAILED_SESSIONS"
+ echo ""
+ echo "Download logs: $ARTIFACTS_URL"
+
+ echo "## ❌ Test Failures (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "**✅ $PASS_COUNT** passed, **❌ $FAIL_COUNT** failed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "### Failed tests:" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+ echo "$FAILED_SESSIONS" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+
+ DURATION_FILE=$(find logs/pytest -name "duration_summary.txt" 2>/dev/null | head -1)
+ if [ -n "$DURATION_FILE" ] && [ -f "$DURATION_FILE" ]; then
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "📊 Test Stats: Duration & Cache (fastest → slowest)
" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+ cat "$DURATION_FILE" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+ echo " " >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Generate success summary
+ if: success()
+ run: |
+ ARTIFACTS_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts"
+
+ log_count=0
+ if [ -d "logs/pytest" ]; then
+ log_count=$(find logs/pytest -name "*.txt" -type f 2>/dev/null | wc -l | tr -d ' ')
+ fi
+
+ if [ "$log_count" -eq 0 ]; then
+ echo "## ⚠️ No Tests Ran (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "**0 tests executed** — this likely indicates a problem with test discovery." >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "Check the 'Run tests' step logs for details." >> $GITHUB_STEP_SUMMARY
+ echo ""
+ echo "Error: No tests were executed. This indicates a problem with test discovery." >&2
+ echo "Check the 'Run tests' step output for pytest collection errors." >&2
+ exit 1
+ fi
+
+ echo "=== All tests passed ==="
+ echo "✅ $log_count passed"
+ echo ""
+ echo "View logs: $ARTIFACTS_URL"
+
+ echo "## ✅ All Tests Passed (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "**✅ $log_count** passed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY
+
+ DURATION_FILE=$(find logs/pytest -name "duration_summary.txt" 2>/dev/null | head -1)
+ if [ -n "$DURATION_FILE" ] && [ -f "$DURATION_FILE" ]; then
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "📊 Test Stats: Duration & Cache (fastest → slowest)
" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+ cat "$DURATION_FILE" >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+ echo " " >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Stop local orchestra (before log upload)
+ if: always()
+ continue-on-error: true
+ run: |
+ trap 'echo "Received SIGTERM, exiting gracefully"; exit 0' TERM
+ orchestra/scripts/local.sh stop || true
+
+ - name: Prepare logs for upload
+ if: always()
+ run: |
+ echo "=== Preparing logs for upload ==="
+ mkdir -p logs
+
+ if [ -d logs/pytest ]; then
+ file_count=$(find logs/pytest -type f 2>/dev/null | wc -l | tr -d ' ')
+ echo "✓ pytest logs: $file_count files"
+ else
+ echo "○ pytest logs: directory not found (skipping)"
+ fi
+
+ if [ -d logs/unillm ]; then
+ file_count=$(find logs/unillm -type f 2>/dev/null | wc -l | tr -d ' ')
+ echo "✓ unillm logs: $file_count files"
+ else
+ echo "○ unillm logs: directory not found (skipping)"
+ fi
+
+ if [ -d logs/orchestra ]; then
+ sanitized=0
+ while IFS= read -r -d '' file; do
+ newname=$(echo "$file" | tr ':' '_')
+ if mv "$file" "$newname" 2>/dev/null; then
+ ((sanitized++)) || true
+ fi
+ done < <(find logs/orchestra -type f -name '*:*' -print0 2>/dev/null || true)
+
+ file_count=$(find logs/orchestra -type f 2>/dev/null | wc -l | tr -d ' ')
+ if [ "$sanitized" -gt 0 ]; then
+ echo "✓ orchestra logs: $file_count files ($sanitized filenames sanitized)"
+ else
+ echo "✓ orchestra logs: $file_count files"
+ fi
+ else
+ echo "○ orchestra logs: directory not found (skipping)"
+ fi
+
+ echo "=== Log preparation complete ==="
+
+ - name: Upload logs
+ if: always()
+ uses: actions/upload-artifact@v7
+ with:
+ name: logs-lightweight-shard-${{ matrix.shard }}
+ path: logs/
+ if-no-files-found: ignore
+ retention-days: 90
+
+ - name: Cleanup on cancellation
+ if: cancelled()
+ run: |
+ echo "Workflow cancelled - cleaning up tmux sessions and orphaned processes..."
+ tests/kill_server.sh --all || true
+ orchestra/scripts/local.sh stop || true
+ echo "Cleanup complete."
diff --git a/tests/_parse_args.sh b/tests/_parse_args.sh
index a4e5be6d9..d1f7f8006 100644
--- a/tests/_parse_args.sh
+++ b/tests/_parse_args.sh
@@ -11,9 +11,10 @@
#
# After calling parse_test_args, these variables are populated:
# SERIAL, TIMEOUT, NAME_PATTERN, EVAL_ONLY, SYMBOLIC_ONLY,
-# REPEAT_COUNT, OVERWRITE_SCENARIOS, MAX_JOBS, ENV_OVERRIDES[],
-# TAGS[], PYTEST_EXTRA_ARGS[], PYTEST_COLLECTION_ARGS[],
-# POSITIONAL_ARGS[]
+# REPEAT_COUNT, OVERWRITE_SCENARIOS, SKIP_COLLECTION,
+# MAX_JOBS, ENV_OVERRIDES[],
+# TAGS[], FROM_FILE_PATHS[], PYTEST_EXTRA_ARGS[],
+# PYTEST_COLLECTION_ARGS[], POSITIONAL_ARGS[]
#
# Additional functions:
# resolve_test_paths REPO_ROOT - Validates paths in POSITIONAL_ARGS, sets RESOLVED_TEST_PATHS[]
@@ -42,11 +43,14 @@ parse_test_args() {
SYMBOLIC_ONLY=0
REPEAT_COUNT=1
OVERWRITE_SCENARIOS=0
+ SKIP_COLLECTION=0
MAX_JOBS=$_PARSE_ARGS_NUM_CORES
ENV_OVERRIDES=()
TAGS=()
+ FROM_FILE_PATHS=()
PYTEST_EXTRA_ARGS=()
PYTEST_COLLECTION_ARGS=()
+ PYTEST_IGNORE_PATHS=()
POSITIONAL_ARGS=()
while (( "$#" )); do
@@ -103,6 +107,10 @@ parse_test_args() {
OVERWRITE_SCENARIOS=1
shift
;;
+ --skip-collection)
+ SKIP_COLLECTION=1
+ shift
+ ;;
--tags)
if [[ -n "${2-}" ]]; then
# Split on comma and add each tag to TAGS array
@@ -116,6 +124,15 @@ parse_test_args() {
return 2
fi
;;
+ --from-file)
+ if [[ -n "${2-}" ]]; then
+ FROM_FILE_PATHS+=( "$2" )
+ shift 2
+ else
+ echo "Error: --from-file requires a file path." >&2
+ return 2
+ fi
+ ;;
-j|--jobs)
if [[ -z "${2-}" ]]; then
echo "Error: -j|--jobs requires an argument (e.g., --jobs 8, --jobs 0, --jobs none)." >&2
@@ -141,8 +158,10 @@ parse_test_args() {
--)
shift
PYTEST_EXTRA_ARGS=("$@")
- # Extract collection-relevant args (-k, -m) for use during test discovery
- # These filters affect which tests are collected, not just how they run
+ # Extract collection-relevant args for use during test discovery.
+ # These affect which tests are collected, not just how they run:
+ # -k/-m: filter by keyword/marker expression
+ # --ignore/--ignore-glob: exclude paths from collection
local _coll_i=0
while (( _coll_i < ${#PYTEST_EXTRA_ARGS[@]} )); do
local _coll_arg="${PYTEST_EXTRA_ARGS[_coll_i]}"
@@ -156,11 +175,32 @@ parse_test_args() {
((_coll_i++))
fi
;;
+ --ignore)
+ # Next arg is the path (e.g., --ignore tests/actor)
+ if (( _coll_i + 1 < ${#PYTEST_EXTRA_ARGS[@]} )); then
+ PYTEST_COLLECTION_ARGS+=( "$_coll_arg" "${PYTEST_EXTRA_ARGS[_coll_i+1]}" )
+ PYTEST_IGNORE_PATHS+=( "${PYTEST_EXTRA_ARGS[_coll_i+1]%/}" )
+ ((_coll_i+=2))
+ else
+ ((_coll_i++))
+ fi
+ ;;
-k=*|-m=*)
# Value is attached (e.g., -k="pattern")
PYTEST_COLLECTION_ARGS+=( "$_coll_arg" )
((_coll_i++))
;;
+ --ignore=*)
+ # Value is attached (e.g., --ignore=tests/actor)
+ PYTEST_COLLECTION_ARGS+=( "$_coll_arg" )
+ local _ival="${_coll_arg#--ignore=}"
+ PYTEST_IGNORE_PATHS+=( "${_ival%/}" )
+ ((_coll_i++))
+ ;;
+ --ignore-glob=*)
+ PYTEST_COLLECTION_ARGS+=( "$_coll_arg" )
+ ((_coll_i++))
+ ;;
--keyword=*|--markers=*)
# Long form with attached value
PYTEST_COLLECTION_ARGS+=( "$_coll_arg" )
@@ -236,12 +276,17 @@ reconstruct_parallel_run_args() {
(( SYMBOLIC_ONLY )) && args="$args --symbolic-only"
(( REPEAT_COUNT > 1 )) && args="$args --repeat $REPEAT_COUNT"
(( OVERWRITE_SCENARIOS )) && args="$args --overwrite-scenarios"
+ (( SKIP_COLLECTION )) && args="$args --skip-collection"
# Note: MAX_JOBS is not passed to CI (CI has its own resource limits)
for tag in "${TAGS[@]}"; do
args="$args --tags $(printf '%q' "$tag")"
done
+ for path in "${FROM_FILE_PATHS[@]}"; do
+ args="$args --from-file $(printf '%q' "$path")"
+ done
+
# Include --env flags if requested
if (( include_env )); then
for kv in "${ENV_OVERRIDES[@]}"; do
@@ -280,7 +325,9 @@ Options:
--eval-only Run only @pytest.mark.eval tests
--symbolic-only Run only non-eval tests
--repeat N Run each test N times
+ --skip-collection Trust explicit node ids instead of validating via collection
--tags TAG Tag runs for filtering (repeatable)
+ --from-file PATH Read test targets from a newline-delimited file
--overwrite-scenarios Delete and recreate test scenarios
-h, --help Show this help
-- Pass remaining args directly to pytest
@@ -292,6 +339,8 @@ Examples:
$script_name -s tests/ # Serial mode (per-file)
$script_name -j 8 tests/ # Limit to 8 concurrent
$script_name --eval-only tests/ # Only eval tests
+ $script_name --skip-collection tests/foo.py::test_bar
+ $script_name --from-file targets.txt # Read targets from file
$script_name -e UNILLM_CACHE=false tests/
$script_name tests/ -- -v --tb=short # Pass args to pytest
$script_name tests/ -- -k 'gpt-5' # Filter by test name pattern
diff --git a/tests/parallel_run.sh b/tests/parallel_run.sh
index a651cce60..cf3dc1a9e 100755
--- a/tests/parallel_run.sh
+++ b/tests/parallel_run.sh
@@ -188,8 +188,8 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd -P)"
# Parse arguments using shared helper
# Returns: 0=success, 1=help requested, 2=error
-parse_test_args "$@"
-_parse_result=$?
+_parse_result=0
+parse_test_args "$@" || _parse_result=$?
if (( _parse_result == 1 )); then
# Help requested
HELP_SCRIPT_NAME="parallel_run.sh"
@@ -575,8 +575,51 @@ build_env_exports() {
echo "$exports"
}
-# Reset positional parameters safely under nounset (only expand if set)
-set -- ${POSITIONAL_ARGS[@]+"${POSITIONAL_ARGS[@]}"}
+# Resolve an auxiliary input file path relative to the caller, tests/, or repo root.
+resolve_input_file_path() {
+ local path="$1"
+ if [[ -f "$path" ]]; then
+ printf "%s" "$path"
+ elif [[ -f "$SCRIPT_DIR/$path" ]]; then
+ printf "%s" "$SCRIPT_DIR/$path"
+ elif [[ -f "$REPO_ROOT/$path" ]]; then
+ printf "%s" "$REPO_ROOT/$path"
+ else
+ return 1
+ fi
+}
+
+expanded_targets=()
+if (( ${#POSITIONAL_ARGS[@]} > 0 )); then
+ expanded_targets=( "${POSITIONAL_ARGS[@]}" )
+fi
+
+had_explicit_target_source=0
+if (( ${#POSITIONAL_ARGS[@]} > 0 || ${#FROM_FILE_PATHS[@]} > 0 )); then
+ had_explicit_target_source=1
+fi
+
+if (( ${#FROM_FILE_PATHS[@]} > 0 )); then
+ for list_path in "${FROM_FILE_PATHS[@]}"; do
+ resolved_list_path=$(resolve_input_file_path "$list_path") || {
+ echo "Error: --from-file not found: $list_path" >&2
+ exit 2
+ }
+ while IFS= read -r raw_line || [[ -n "$raw_line" ]]; do
+ line="${raw_line%$'\r'}"
+ line="${line#"${line%%[![:space:]]*}"}"
+ line="${line%"${line##*[![:space:]]}"}"
+ [[ -z "$line" || "$line" == \#* ]] && continue
+ expanded_targets+=( "$line" )
+ done < "$resolved_list_path"
+ done
+fi
+
+if (( ${#expanded_targets[@]} > 0 )); then
+ set -- "${expanded_targets[@]}"
+else
+ set --
+fi
# Always operate from the repo root for discovery, regardless of where the script was invoked
cd "$REPO_ROOT"
@@ -979,6 +1022,10 @@ declare -a direct_files=()
declare -a direct_nodes=()
if (( $# == 0 )); then
+ if (( had_explicit_target_source )); then
+ echo "No valid directories, files, or tests provided." >&2
+ exit 1
+ fi
roots=( "." )
else
for arg in "$@"; do
@@ -1140,6 +1187,11 @@ validate_and_add_direct_nodes() {
return 0
fi
+ if (( SKIP_COLLECTION )); then
+ printf '%s\0' "${direct_nodes[@]}" >> "$tmp"
+ return 0
+ fi
+
# Extract unique base files from direct_nodes
local -a base_files=()
local seen_files=""
@@ -1154,15 +1206,19 @@ validate_and_add_direct_nodes() {
# Collect all valid nodes from those files (no marker filter — just checking existence)
local collected
collected=$(collect_nodes_batch "" "${base_files[@]}")
+ local collected_file
+ collected_file=$(mktemp)
+ printf '%s\n' "$collected" > "$collected_file"
# Validate each direct_node against collected output
for node in "${direct_nodes[@]}"; do
- if echo "$collected" | grep -qxF "$node"; then
+ if grep -qxF -- "$node" "$collected_file"; then
printf '%s\0' "$node" >> "$tmp"
else
echo "Error: Test node not found (skipping): $node" >&2
fi
done
+ rm -f "$collected_file"
}
# Gather recursive .py files from roots (NUL-delimited, sorted)
@@ -1174,6 +1230,41 @@ if (( ${#roots[@]} )); then
done < <(eval "$(build_find_cmd)")
fi
+# Filter out files under --ignore paths (pytest --ignore only affects directory
+# traversal, not explicitly listed targets, so we must filter before collection)
+if (( ${#PYTEST_IGNORE_PATHS[@]} > 0 && (${#found_files[@]} + ${#direct_files[@]}) > 0 )); then
+ _filter_ignored() {
+ local -a input=("$@")
+ local -a output=()
+ for f in "${input[@]}"; do
+ local _skip=0
+ for pfx in "${PYTEST_IGNORE_PATHS[@]}"; do
+ if [[ "$f" == "$pfx"/* || "$f" == "./$pfx"/* || "$f" == "$pfx" ]]; then
+ _skip=1
+ break
+ fi
+ done
+ (( _skip )) || output+=( "$f" )
+ done
+ printf '%s\0' "${output[@]}"
+ }
+ if (( ${#found_files[@]} )); then
+ tmp_filtered=()
+ while IFS= read -r -d '' f; do
+ tmp_filtered+=( "$f" )
+ done < <(_filter_ignored "${found_files[@]}")
+ found_files=( "${tmp_filtered[@]}" )
+ fi
+ if (( ${#direct_files[@]} )); then
+ tmp_filtered=()
+ while IFS= read -r -d '' f; do
+ tmp_filtered+=( "$f" )
+ done < <(_filter_ignored "${direct_files[@]}")
+ direct_files=( "${tmp_filtered[@]}" )
+ fi
+ unset -f _filter_ignored
+fi
+
# Apply filename pattern filter (matches on basename) if provided
if [[ -n "$NAME_PATTERN" ]]; then
if (( ${#direct_files[@]} )); then
@@ -1310,6 +1401,7 @@ fi
# Print header before drip-feeding session creation
echo "Creating ${#files[@]} tmux sessions..."
+WALL_START=$(date +%s)
for target in "${files[@]}"; do
# Report any completions before creating new sessions
@@ -1526,8 +1618,17 @@ if (( total_tests > 0 )); then
else
duration_str="${total_duration}s"
fi
+ wall_duration=$(( $(date +%s) - WALL_START ))
+ if (( wall_duration >= 60 )); then
+ wall_mins=$((wall_duration / 60))
+ wall_secs=$((wall_duration % 60))
+ wall_str="${wall_mins}m ${wall_secs}s"
+ else
+ wall_str="${wall_duration}s"
+ fi
total_cache_rate=$(format_cache_rate "$total_hits" "$total_misses")
total_calls=$((total_hits + total_misses))
+ print_duration_line " Wall time: $wall_str"
print_duration_line " Serial duration: $duration_str"
print_duration_line " LLM cache: $total_cache_rate ($total_hits hits, $total_misses misses, $total_calls total)"
print_duration_line " LLM cost: \$$total_cost"