From 3d337936fe770bce29778daec9688660551175f8 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Tue, 7 Apr 2026 13:48:12 +0200 Subject: [PATCH 01/14] ci: restructure CI to auto-run tests on staging/main pushes and PRs to main Push to staging/main runs lightweight tests (marker "not llm_call", excluding actor/conversation_manager/async_tool_loop dirs). PRs to main run the full suite. Removes the opt-in [run-tests]/[parallel_run.sh] tag mechanism. Also fixes discover_test_paths.py which had a broken startswith("test") filter that didn't match any actual test directory names. --- .github/scripts/discover_test_paths.py | 87 +++++++-- .github/workflows/tests.yml | 249 ++++++------------------- 2 files changed, 126 insertions(+), 210 deletions(-) diff --git a/.github/scripts/discover_test_paths.py b/.github/scripts/discover_test_paths.py index 49e37e701..db203cab3 100644 --- a/.github/scripts/discover_test_paths.py +++ b/.github/scripts/discover_test_paths.py @@ -9,6 +9,9 @@ # Expand specific paths to their leaf directories python discover_test_paths.py tests/function_manager tests/actor + # Exclude directories from discovery + python discover_test_paths.py --exclude tests/actor --exclude tests/conversation_manager + When explicit paths are provided: - Files are kept as-is (no expansion) - Directories are expanded to their leaf sub-directories using Option A algorithm @@ -19,8 +22,8 @@ direct test files (space-separated), plus recursive jobs for subdirs """ +import argparse import os -import sys from pathlib import Path EXCLUDE_DIRS = { @@ -33,6 +36,14 @@ "venv", } +_exclude_prefixes: list[str] = [] + + +def _is_excluded(path: str | Path) -> bool: + """Return True if path starts with any --exclude prefix.""" + s = str(path) + return any(s == p or s.startswith(p + "/") for p in _exclude_prefixes) + def has_test_files(directory): """Check if directory has test_*.py files directly in it.""" @@ -46,9 +57,17 @@ def has_test_files(directory): def has_test_subdirs(directory): """Check if directory has subdirectories that contain test files (recursively).""" for subdir in directory.iterdir(): - if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: + if ( + subdir.is_dir() + and subdir.name not in EXCLUDE_DIRS + and not _is_excluded(subdir) + ): for root, dirs, files in os.walk(subdir): - dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS] + dirs[:] = [ + d + for d in dirs + if d not in EXCLUDE_DIRS and not _is_excluded(os.path.join(root, d)) + ] if any(f.startswith("test_") and f.endswith(".py") for f in files): return True return False @@ -67,7 +86,7 @@ def get_direct_test_files(directory): def collect_paths(directory, paths): """Recursively collect test paths using Option A algorithm.""" - if not directory.is_dir(): + if not directory.is_dir() or _is_excluded(directory): return has_files = has_test_files(directory) @@ -81,12 +100,20 @@ def collect_paths(directory, paths): direct_files = get_direct_test_files(directory) paths.append(" ".join(str(f) for f in direct_files)) for subdir in sorted(directory.iterdir()): - if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: + if ( + subdir.is_dir() + and subdir.name not in EXCLUDE_DIRS + and not _is_excluded(subdir) + ): collect_paths(subdir, paths) elif has_subdirs: # No direct test files, but has subdirs with tests: just recurse for subdir in sorted(directory.iterdir()): - if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: + if ( + subdir.is_dir() + and subdir.name not in EXCLUDE_DIRS + and not _is_excluded(subdir) + ): collect_paths(subdir, paths) @@ -100,6 +127,9 @@ def expand_path(path_str): path = Path(path_str) paths = [] + if _is_excluded(path): + return [] + if not path.exists(): # Path doesn't exist - return as-is and let pytest handle the error return [path_str] @@ -122,12 +152,20 @@ def expand_path(path_str): direct_files = get_direct_test_files(path) paths.append(" ".join(str(f) for f in direct_files)) for subdir in sorted(path.iterdir()): - if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: + if ( + subdir.is_dir() + and subdir.name not in EXCLUDE_DIRS + and not _is_excluded(subdir) + ): collect_paths(subdir, paths) elif has_subdirs: # No direct test files, but has subdirs with tests: recurse for subdir in sorted(path.iterdir()): - if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: + if ( + subdir.is_dir() + and subdir.name not in EXCLUDE_DIRS + and not _is_excluded(subdir) + ): collect_paths(subdir, paths) else: # No test files at all - return as-is and let pytest handle it @@ -141,33 +179,50 @@ def discover_all(): test_root = Path("tests") paths = [] - # Handle test files directly in tests/ root (e.g., test_settings.py) for item in sorted(test_root.iterdir()): + if _is_excluded(item): + continue if ( item.is_file() and item.name.startswith("test_") and item.name.endswith(".py") ): paths.append(str(item)) - elif item.is_dir() and item.name.startswith("test"): + elif item.is_dir() and item.name not in EXCLUDE_DIRS: collect_paths(item, paths) return paths def main(): - if len(sys.argv) > 1: - # Explicit paths provided - expand each one + parser = argparse.ArgumentParser( + description="Discover test paths for CI parallelism", + ) + parser.add_argument( + "paths", + nargs="*", + help="Paths to expand (default: discover all)", + ) + parser.add_argument( + "--exclude", + action="append", + default=[], + help="Directory prefix to exclude (repeatable)", + ) + args = parser.parse_args() + + global _exclude_prefixes + _exclude_prefixes = [p.rstrip("/") for p in args.exclude] + + if args.paths: all_paths = [] - for arg in sys.argv[1:]: - expanded = expand_path(arg) + for p in args.paths: + expanded = expand_path(p) all_paths.extend(expanded) paths = all_paths else: - # No arguments - discover all from tests/ paths = discover_all() - # Output unique paths, sorted for p in sorted(set(paths)): print(p) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 47116f1f7..9d0a70a57 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,137 +36,48 @@ on: type: number default: 120 - # Push events - tests only run if commit message contains [run-tests] + # Push to staging/main → lightweight tests (not llm_call, excluded heavy dirs) push: branches: - - '**' # All branches + - staging + - main - # Pull request events - tests only run if PR title contains [run-tests] + # PR to main → full test suite pull_request: branches: - - '**' # All branches + - main jobs: - # Determine if tests should run based on trigger and commit/PR message + # Determine test mode based on trigger type. + # Branch filtering is handled by the on: triggers above, so all events here + # are guaranteed to be: push to staging/main, PR to main, or workflow_dispatch. should-run-tests: runs-on: ubuntu-latest outputs: - run_tests: ${{ steps.check.outputs.run_tests }} - parallel_run_cmd: ${{ steps.check.outputs.parallel_run_cmd }} + test_mode: ${{ steps.check.outputs.test_mode }} steps: - - name: Check if tests should run + - name: Determine test mode id: check env: - # Pass these via env to avoid shell interpretation of special chars - # (backticks, quotes, $() in commit messages would otherwise be executed) EVENT_NAME: ${{ github.event_name }} - COMMIT_MSG: ${{ github.event.head_commit.message }} - PR_TITLE: ${{ github.event.pull_request.title }} - PR_BASE_REF: ${{ github.event.pull_request.base.ref }} - PR_HEAD_REF: ${{ github.event.pull_request.head.ref }} run: | - # Helper function to extract [parallel_run.sh ...] content - # Matches ALL occurrences and combines their arguments - extract_parallel_cmd() { - local msg="$1" - local result="" - local remaining="$msg" - - # Loop through all matches of [parallel_run.sh ...] - while [[ "$remaining" =~ \[parallel_run\.sh[[:space:]]+([^\]]+)\] ]]; do - local match="${BASH_REMATCH[1]}" - # Skip literal "..." which is just documentation/example text - if [[ "$match" != "..." ]]; then - if [[ -n "$result" ]]; then - result="$result $match" - else - result="$match" - fi - fi - # Remove the matched portion and continue searching - remaining="${remaining#*"${BASH_REMATCH[0]}"}" - done - - if [[ -n "$result" ]]; then - echo "$result" - fi - } - - # workflow_dispatch always runs tests if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Tests triggered by workflow_dispatch" - exit 0 - fi - - # For push events, check commit message - if [[ "$EVENT_NAME" == "push" ]]; then - # Check for [parallel_run.sh ...] first - PARALLEL_CMD=$(extract_parallel_cmd "$COMMIT_MSG") - if [[ -n "$PARALLEL_CMD" ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=$PARALLEL_CMD" >> "$GITHUB_OUTPUT" - echo "Tests triggered by commit message: [parallel_run.sh $PARALLEL_CMD]" - exit 0 - fi - - # Fall back to [run-tests] - if [[ "$COMMIT_MSG" == *"[run-tests]"* ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Tests triggered by commit message containing [run-tests]" - else - echo "run_tests=false" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Skipping tests - commit message does not contain [run-tests] or [parallel_run.sh ...]" - fi - exit 0 - fi - - # For pull_request events, check PR title - if [[ "$EVENT_NAME" == "pull_request" ]]; then - # Auto-run full suite for staging → main PRs (no tags required) - if [[ "$PR_BASE_REF" == "main" && "$PR_HEAD_REF" == "staging" ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Tests auto-triggered: staging → main PR" - exit 0 - fi - - # Check for [parallel_run.sh ...] first - PARALLEL_CMD=$(extract_parallel_cmd "$PR_TITLE") - if [[ -n "$PARALLEL_CMD" ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=$PARALLEL_CMD" >> "$GITHUB_OUTPUT" - echo "Tests triggered by PR title: [parallel_run.sh $PARALLEL_CMD]" - exit 0 - fi - - # Fall back to [run-tests] - if [[ "$PR_TITLE" == *"[run-tests]"* ]]; then - echo "run_tests=true" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Tests triggered by PR title containing [run-tests]" - else - echo "run_tests=false" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Skipping tests - PR title does not contain [run-tests] or [parallel_run.sh ...]" - fi - exit 0 + echo "test_mode=manual" >> "$GITHUB_OUTPUT" + echo "Mode: manual (workflow_dispatch)" + elif [[ "$EVENT_NAME" == "push" ]]; then + echo "test_mode=lightweight" >> "$GITHUB_OUTPUT" + echo "Mode: lightweight (push to ${GITHUB_REF#refs/heads/})" + elif [[ "$EVENT_NAME" == "pull_request" ]]; then + echo "test_mode=full" >> "$GITHUB_OUTPUT" + echo "Mode: full (PR to main)" fi - # Default: don't run tests - echo "run_tests=false" >> "$GITHUB_OUTPUT" - echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" - echo "Skipping tests - unknown event type" - discover: runs-on: ubuntu-latest needs: should-run-tests - if: needs.should-run-tests.outputs.run_tests == 'true' outputs: test_dirs: ${{ steps.set-matrix.outputs.test_dirs }} + test_mode: ${{ needs.should-run-tests.outputs.test_mode }} parallel_run_args: ${{ steps.set-matrix.outputs.parallel_run_args }} job_timeout: ${{ steps.set-matrix.outputs.job_timeout }} # Project deletion flags (handled at workflow level to avoid race conditions) @@ -181,25 +92,15 @@ jobs: - id: set-matrix shell: bash run: | - # Parse test paths and flags, then create matrix entries for each path. - # Each unique path gets its own parallel GitHub runner. - # - # Sources (priority order): - # 1. [parallel_run.sh ...] from commit/PR → parse paths and flags - # 2. workflow_dispatch inputs → test_path + parallel_run_args - # 3. Default (no paths) → discover all leaf test folders + # Discover test paths and create matrix entries for parallel runners. # - # ALL paths (explicit or discovered) are expanded to leaf directories: - # - Files are kept as-is - # - Directories are expanded to their leaf sub-folders + # Sources: + # 1. workflow_dispatch inputs → test_path + parallel_run_args + # 2. Default → discover all leaf test folders (with mode-based exclusions) # - # Leaf discovery (Option A algorithm): - # - Leaf directories (test files, no test subdirs) → one job per directory - # - Mixed directories (test files AND test subdirs) → one bundled job for - # all direct test files (space-separated), plus recursive jobs for subdirs - # This maximizes parallelism regardless of how paths are specified. + # Paths are expanded to leaf directories for maximum parallelism. - PARALLEL_CMD="${{ needs.should-run-tests.outputs.parallel_run_cmd }}" + TEST_MODE="${{ needs.should-run-tests.outputs.test_mode }}" INPUT_PATH="${{ inputs.test_path }}" INPUT_ARGS="${{ inputs.parallel_run_args }}" @@ -208,31 +109,15 @@ jobs: declare -a PATHS=() declare -a FLAGS=() - if [[ -n "$PARALLEL_CMD" ]]; then - # Parse [parallel_run.sh ...] from commit/PR message - # Separate flags (--env, --eval-only, etc.) from paths - echo "Parsing [parallel_run.sh $PARALLEL_CMD]" >&2 - LAST_FLAG="" - for token in $PARALLEL_CMD; do - if [[ "$token" == --* ]]; then - # It's a flag - collect it and its value if needed - FLAGS+=("$token") - LAST_FLAG="$token" - elif [[ "$LAST_FLAG" == "--env" || "$LAST_FLAG" == "--tags" || "$LAST_FLAG" == "-j" || "$LAST_FLAG" == "--jobs" || "$LAST_FLAG" == "-t" || "$LAST_FLAG" == "--timeout" ]]; then - # Previous token was a flag that takes a value - FLAGS+=("$token") - LAST_FLAG="" - else - # It's a path - collect for expansion - RAW_PATHS+=("$token") - LAST_FLAG="" - fi - done - elif [[ -n "$INPUT_PATH" && "$INPUT_PATH" != "." ]]; then - # workflow_dispatch with specific path(s) - # Split space-separated paths into array + # Build --exclude args for lightweight mode + EXCLUDE_ARGS="" + if [[ "$TEST_MODE" == "lightweight" ]]; then + EXCLUDE_ARGS="--exclude tests/actor --exclude tests/conversation_manager --exclude tests/async_tool_loop" + echo "Lightweight mode: excluding tests/actor, tests/conversation_manager, tests/async_tool_loop" >&2 + fi + + if [[ "$TEST_MODE" == "manual" && -n "$INPUT_PATH" && "$INPUT_PATH" != "." ]]; then read -ra RAW_PATHS <<< "$INPUT_PATH" - # Flags come from parallel_run_args input if [[ -n "$INPUT_ARGS" ]]; then read -ra FLAGS <<< "$INPUT_ARGS" fi @@ -240,28 +125,21 @@ jobs: fi # Expand paths to leaf directories using discover_test_paths.py - # - If RAW_PATHS is empty, discover all test paths from tests/ - # - If RAW_PATHS has entries, expand each directory to its leaf sub-folders - # This ensures maximum parallelism regardless of how paths are specified. if (( ${#RAW_PATHS[@]} == 0 )); then - # No explicit paths - discover all while IFS= read -r path; do PATHS+=("$path") - done < <(python3 .github/scripts/discover_test_paths.py) - # Flags come from parallel_run_args input (if any) - if [[ -n "$INPUT_ARGS" ]]; then + done < <(python3 .github/scripts/discover_test_paths.py $EXCLUDE_ARGS) + if [[ "$TEST_MODE" == "manual" && -n "$INPUT_ARGS" ]]; then read -ra FLAGS <<< "$INPUT_ARGS" fi echo "Discovered ${#PATHS[@]} test paths (leaf dirs + individual files)" >&2 else - # Expand explicit paths to their leaf directories while IFS= read -r path; do PATHS+=("$path") - done < <(python3 .github/scripts/discover_test_paths.py "${RAW_PATHS[@]}") + done < <(python3 .github/scripts/discover_test_paths.py $EXCLUDE_ARGS "${RAW_PATHS[@]}") echo "Expanded ${#RAW_PATHS[@]} path(s) to ${#PATHS[@]} leaf test paths" >&2 fi - # Build JSON array of paths for matrix if (( ${#PATHS[@]} == 0 )); then echo "Error: No test paths found" >&2 exit 1 @@ -281,12 +159,9 @@ jobs: # to .env in the pytest job. We only parse it here for project mgmt flags. # ===================================================================== - # CI defaults for project management (can be overridden) - # With local orchestra (default), these are ignored - each job has fresh DB - # Only relevant when ORCHESTRA_URL is set to a non-local URL (staging/production) - DELETE_ON_START="false" # Default: no deletion (local orchestra has fresh DB) - DELETE_ON_EXIT="false" # Default: keep project after tests - RANDOM_PROJECTS="false" # Default: shared project mode + DELETE_ON_START="false" + DELETE_ON_EXIT="false" + RANDOM_PROJECTS="false" # Orchestra branch precedence (highest to lowest): # 1. --env LOCAL_ORCHESTRA_BRANCH=xxx (handled below in FLAGS parsing) @@ -308,11 +183,9 @@ jobs: fi # Parse env_file_content for project management flags only - # (actual env vars are written to .env in pytest job, not passed as --env args) ENV_FILE_CONTENT_RAW="${{ inputs.env_file_content }}" if [[ -n "$ENV_FILE_CONTENT_RAW" ]]; then echo "Parsing env_file_content for project management flags..." >&2 - # Try to decode as base64; if it fails or produces garbage, assume raw content if DECODED=$(echo "$ENV_FILE_CONTENT_RAW" | base64 -d 2>/dev/null) && [[ "$DECODED" == *$'\n'* || "$DECODED" == *"="* ]]; then ENV_FILE_CONTENT="$DECODED" else @@ -320,20 +193,13 @@ jobs: fi if [[ -n "$ENV_FILE_CONTENT" ]]; then while IFS= read -r line || [[ -n "$line" ]]; do - # Skip empty lines and comments [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue - - # Extract KEY=VALUE (handles quotes) if [[ "$line" =~ ^[[:space:]]*([A-Za-z_][A-Za-z0-9_]*)[[:space:]]*=[[:space:]]*(.*)[[:space:]]*$ ]]; then key="${BASH_REMATCH[1]}" value="${BASH_REMATCH[2]}" - - # Strip surrounding quotes if present if [[ "$value" =~ ^\"(.*)\"$ ]] || [[ "$value" =~ ^\'(.*)\'$ ]]; then value="${BASH_REMATCH[1]}" fi - - # Only check for project management flags (other vars handled via .env file) case "$key=$value" in UNIFY_TESTS_DELETE_PROJ_ON_START=true|UNIFY_TESTS_DELETE_PROJ_ON_START=True|UNIFY_TESTS_DELETE_PROJ_ON_START=1) DELETE_ON_START="true" @@ -370,7 +236,6 @@ jobs: fi # Process FLAGS (from parallel_run_args) - highest priority - # These are explicit --env args passed via commit message or workflow dispatch declare -a CLEANED_FLAGS=() skip_next=false @@ -383,7 +248,6 @@ jobs: flag="${FLAGS[$i]}" next_val="${FLAGS[$((i+1))]:-}" - # Check for --env with project management flags (strip them, handle at workflow level) if [[ "$flag" == "--env" ]]; then case "$next_val" in UNIFY_TESTS_DELETE_PROJ_ON_START=true|UNIFY_TESTS_DELETE_PROJ_ON_START=True|UNIFY_TESTS_DELETE_PROJ_ON_START=1) @@ -412,7 +276,7 @@ jobs: ;; UNIFY_TESTS_RAND_PROJ=true|UNIFY_TESTS_RAND_PROJ=True|UNIFY_TESTS_RAND_PROJ=1) RANDOM_PROJECTS="true" - CLEANED_FLAGS+=("$flag" "$next_val") # Keep for runners + CLEANED_FLAGS+=("$flag" "$next_val") skip_next=true echo " args: RANDOM_PROJECTS=true (override)" >&2 continue @@ -446,8 +310,6 @@ jobs: echo "random_projects=$RANDOM_PROJECTS" >> "$GITHUB_OUTPUT" echo "orchestra_branch=$ORCHESTRA_BRANCH" >> "$GITHUB_OUTPUT" - # Build parallel_run_args from explicit flags only (not env_file content) - # env_file content is handled separately by writing to .env on the runner if (( ${#CLEANED_FLAGS[@]} > 0 )); then flags_str="${CLEANED_FLAGS[*]}" echo "parallel_run_args=$flags_str" >> "$GITHUB_OUTPUT" @@ -472,7 +334,6 @@ jobs: needs: [should-run-tests, discover] # Only run if DELETE_ON_START is requested AND we're in shared project mode if: | - needs.should-run-tests.outputs.run_tests == 'true' && needs.discover.outputs.delete_on_start == 'true' && needs.discover.outputs.random_projects != 'true' env: @@ -516,7 +377,6 @@ jobs: needs: [should-run-tests, discover, setup] if: | always() && - needs.should-run-tests.outputs.run_tests == 'true' && (needs.setup.result == 'success' || needs.setup.result == 'skipped') strategy: fail-fast: false @@ -838,28 +698,30 @@ jobs: run: | set +e - # parallel_run_args comes from discover job (parsed from commit message or inputs) + TEST_MODE="${{ needs.discover.outputs.test_mode }}" EXTRA_ARGS="${{ needs.discover.outputs.parallel_run_args }}" TEST_PATH="${{ steps.normalize-path.outputs.test_args }}" TIMEOUT="${{ steps.normalize-path.outputs.timeout_seconds }}" - # Start Xvfb manually instead of using xvfb-run. - # xvfb-run doesn't propagate signals to child processes, making - # workflow cancellation ineffective. This approach ensures SIGTERM - # reaches parallel_run.sh directly. + # Determine pytest marker args based on test mode + MARKER_ARGS="" + if [[ "$TEST_MODE" == "lightweight" ]]; then + MARKER_ARGS='-- -m "not llm_call"' + fi + Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 - - # Ensure Xvfb is cleaned up on exit trap "kill $XVFB_PID 2>/dev/null" EXIT - # parallel_run.sh handles starting local orchestra automatically - # Note: TEST_PATH must come before EXTRA_ARGS because EXTRA_ARGS may contain - # "-- pytest-args" and everything after -- is treated as pytest arguments # TEST_PATH is unquoted to allow word splitting for bundled multi-file entries - echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $EXTRA_ARGS" - tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $EXTRA_ARGS + if [[ -n "$MARKER_ARGS" ]]; then + echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $MARKER_ARGS" + tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $MARKER_ARGS + else + echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $EXTRA_ARGS" + tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $EXTRA_ARGS + fi TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE @@ -1114,7 +976,6 @@ jobs: # Cancelled runs likely left partial test data that should be cleaned up if: | always() && - needs.should-run-tests.outputs.run_tests == 'true' && needs.discover.outputs.random_projects != 'true' && (needs.discover.outputs.delete_on_exit == 'true' || needs.pytest.result == 'cancelled') env: @@ -1151,7 +1012,7 @@ jobs: name: Consolidate and store cache runs-on: ubuntu-latest needs: [should-run-tests, pytest, cleanup] - if: always() && needs.should-run-tests.outputs.run_tests == 'true' + if: always() steps: - uses: actions/checkout@v4 From b36d378e6eff41ed078ab930fe8de6cc71bccd24 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 14:51:36 +0200 Subject: [PATCH 02/14] ci: enable lightweight tests when merging to staging --- .github/workflows/tests.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9d0a70a57..06586ce86 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -42,15 +42,16 @@ on: - staging - main - # PR to main → full test suite + # PR to main → full test suite; PR to staging → lightweight tests pull_request: branches: - main + - staging jobs: # Determine test mode based on trigger type. # Branch filtering is handled by the on: triggers above, so all events here - # are guaranteed to be: push to staging/main, PR to main, or workflow_dispatch. + # are guaranteed to be: push to staging/main, PR to main/staging, or workflow_dispatch. should-run-tests: runs-on: ubuntu-latest outputs: @@ -60,6 +61,7 @@ jobs: id: check env: EVENT_NAME: ${{ github.event_name }} + PR_BASE_REF: ${{ github.event.pull_request.base.ref }} run: | if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then echo "test_mode=manual" >> "$GITHUB_OUTPUT" @@ -68,8 +70,13 @@ jobs: echo "test_mode=lightweight" >> "$GITHUB_OUTPUT" echo "Mode: lightweight (push to ${GITHUB_REF#refs/heads/})" elif [[ "$EVENT_NAME" == "pull_request" ]]; then - echo "test_mode=full" >> "$GITHUB_OUTPUT" - echo "Mode: full (PR to main)" + if [[ "$PR_BASE_REF" == "main" ]]; then + echo "test_mode=full" >> "$GITHUB_OUTPUT" + echo "Mode: full (PR to main)" + else + echo "test_mode=lightweight" >> "$GITHUB_OUTPUT" + echo "Mode: lightweight (PR to $PR_BASE_REF)" + fi fi discover: From bb4d75b220dbe807e8ad014d61afbbc4f142f229 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 15:13:53 +0200 Subject: [PATCH 03/14] ci: disable tests.yml temporarily --- .github/workflows/tests.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 06586ce86..2ce2d9efc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,17 +36,6 @@ on: type: number default: 120 - # Push to staging/main → lightweight tests (not llm_call, excluded heavy dirs) - push: - branches: - - staging - - main - - # PR to main → full test suite; PR to staging → lightweight tests - pull_request: - branches: - - main - - staging jobs: # Determine test mode based on trigger type. From 4b82e7d779f7349666c7d8c90b667f420a6fdf4d Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 15:36:59 +0200 Subject: [PATCH 04/14] tests(parallel_run): add support for --ignore --- tests/_parse_args.sh | 28 ++++++++++++++++++++++++++-- tests/parallel_run.sh | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/tests/_parse_args.sh b/tests/_parse_args.sh index a4e5be6d9..5730f772f 100644 --- a/tests/_parse_args.sh +++ b/tests/_parse_args.sh @@ -47,6 +47,7 @@ parse_test_args() { TAGS=() PYTEST_EXTRA_ARGS=() PYTEST_COLLECTION_ARGS=() + PYTEST_IGNORE_PATHS=() POSITIONAL_ARGS=() while (( "$#" )); do @@ -141,8 +142,10 @@ parse_test_args() { --) shift PYTEST_EXTRA_ARGS=("$@") - # Extract collection-relevant args (-k, -m) for use during test discovery - # These filters affect which tests are collected, not just how they run + # Extract collection-relevant args for use during test discovery. + # These affect which tests are collected, not just how they run: + # -k/-m: filter by keyword/marker expression + # --ignore/--ignore-glob: exclude paths from collection local _coll_i=0 while (( _coll_i < ${#PYTEST_EXTRA_ARGS[@]} )); do local _coll_arg="${PYTEST_EXTRA_ARGS[_coll_i]}" @@ -156,11 +159,32 @@ parse_test_args() { ((_coll_i++)) fi ;; + --ignore) + # Next arg is the path (e.g., --ignore tests/actor) + if (( _coll_i + 1 < ${#PYTEST_EXTRA_ARGS[@]} )); then + PYTEST_COLLECTION_ARGS+=( "$_coll_arg" "${PYTEST_EXTRA_ARGS[_coll_i+1]}" ) + PYTEST_IGNORE_PATHS+=( "${PYTEST_EXTRA_ARGS[_coll_i+1]%/}" ) + ((_coll_i+=2)) + else + ((_coll_i++)) + fi + ;; -k=*|-m=*) # Value is attached (e.g., -k="pattern") PYTEST_COLLECTION_ARGS+=( "$_coll_arg" ) ((_coll_i++)) ;; + --ignore=*) + # Value is attached (e.g., --ignore=tests/actor) + PYTEST_COLLECTION_ARGS+=( "$_coll_arg" ) + local _ival="${_coll_arg#--ignore=}" + PYTEST_IGNORE_PATHS+=( "${_ival%/}" ) + ((_coll_i++)) + ;; + --ignore-glob=*) + PYTEST_COLLECTION_ARGS+=( "$_coll_arg" ) + ((_coll_i++)) + ;; --keyword=*|--markers=*) # Long form with attached value PYTEST_COLLECTION_ARGS+=( "$_coll_arg" ) diff --git a/tests/parallel_run.sh b/tests/parallel_run.sh index a651cce60..0fed212d6 100755 --- a/tests/parallel_run.sh +++ b/tests/parallel_run.sh @@ -1174,6 +1174,41 @@ if (( ${#roots[@]} )); then done < <(eval "$(build_find_cmd)") fi +# Filter out files under --ignore paths (pytest --ignore only affects directory +# traversal, not explicitly listed targets, so we must filter before collection) +if (( ${#PYTEST_IGNORE_PATHS[@]} > 0 && (${#found_files[@]} + ${#direct_files[@]}) > 0 )); then + _filter_ignored() { + local -a input=("$@") + local -a output=() + for f in "${input[@]}"; do + local _skip=0 + for pfx in "${PYTEST_IGNORE_PATHS[@]}"; do + if [[ "$f" == "$pfx"/* || "$f" == "./$pfx"/* || "$f" == "$pfx" ]]; then + _skip=1 + break + fi + done + (( _skip )) || output+=( "$f" ) + done + printf '%s\0' "${output[@]}" + } + if (( ${#found_files[@]} )); then + tmp_filtered=() + while IFS= read -r -d '' f; do + tmp_filtered+=( "$f" ) + done < <(_filter_ignored "${found_files[@]}") + found_files=( "${tmp_filtered[@]}" ) + fi + if (( ${#direct_files[@]} )); then + tmp_filtered=() + while IFS= read -r -d '' f; do + tmp_filtered+=( "$f" ) + done < <(_filter_ignored "${direct_files[@]}") + direct_files=( "${tmp_filtered[@]}" ) + fi + unset -f _filter_ignored +fi + # Apply filename pattern filter (matches on basename) if provided if [[ -n "$NAME_PATTERN" ]]; then if (( ${#direct_files[@]} )); then From 8d95b2819b22a07cffcfc19556ac50a899d0cb81 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 15:38:17 +0200 Subject: [PATCH 05/14] add tests_lightweight.yml --- .github/workflows/tests_lightweight.yml | 406 ++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 .github/workflows/tests_lightweight.yml diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml new file mode 100644 index 000000000..fa934ecdc --- /dev/null +++ b/.github/workflows/tests_lightweight.yml @@ -0,0 +1,406 @@ +# Lightweight test suite: runs all tests with -m "not llm_call" as a single job. +# For the full test suite (with LLM calls, matrix parallelism, cache consolidation), +# use tests.yml via workflow_dispatch. +name: Tests (lightweight) +run-name: >- + ${{ + github.event.head_commit.message || + github.event.pull_request.title + }} + +on: + push: + branches: + - staging + - main + + pull_request: + branches: + - main + - staging + +jobs: + pytest: + name: pytest (lightweight) + runs-on: ubuntu-latest-8-cores + environment: unity-testing + timeout-minutes: 130 + env: + UNIFY_TESTS_RAND_PROJ: "false" + UNIFY_TESTS_DELETE_PROJ_ON_START: "false" + UNIFY_TESTS_DELETE_PROJ_ON_EXIT: "false" + UNILLM_CACHE: "true" + ORCHESTRA_REPO_PATH: ${{ github.workspace }}/orchestra + ORCHESTRA_URL: ${{ vars.ORCHESTRA_URL }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + UNITY_COMMS_URL: ${{ vars.UNITY_COMMS_URL }} + GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }} + GCP_LOCATION: ${{ vars.GCP_LOCATION }} + ORCHESTRA_GCP_BUCKET_NAME: ${{ vars.GCP_BUCKET_LOGS }} + ORCHESTRA_GCP_ASSISTANT_MEDIA_BUCKET_NAME: ${{ vars.GCP_BUCKET_ASSISTANT_IMAGES }} + ORCHESTRA_GCP_ASSISTANT_CALL_RECORDINGS_BUCKET_NAME: ${{ vars.GCP_BUCKET_RECORDINGS }} + TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }} + UNIFY_KEY: ${{ secrets.UNIFY_KEY }} + PYTHONPATH: ${{ github.workspace }}/unillm:${{ github.workspace }}/unify + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 1 + + - name: Clone orchestra repo for local deployment + uses: actions/checkout@v6 + with: + repository: unifyai/orchestra + ref: ${{ vars.LOCAL_ORCHESTRA_BRANCH || (github.ref_name == 'main' && 'main' || 'staging') }} + path: orchestra + token: ${{ secrets.CLONE_TOKEN }} + fetch-depth: 1 + + - name: Clone unify repo + uses: actions/checkout@v6 + with: + repository: unifyai/unify + ref: ${{ github.ref_name == 'main' && 'main' || 'staging' }} + path: unify + token: ${{ secrets.CLONE_TOKEN }} + fetch-depth: 1 + + - name: Clone unillm repo + uses: actions/checkout@v6 + with: + repository: unifyai/unillm + ref: ${{ github.ref_name == 'main' && 'main' || 'staging' }} + path: unillm + token: ${{ secrets.CLONE_TOKEN }} + fetch-depth: 1 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }} + + - name: Get orchestra commit SHA + id: orchestra-sha + run: echo "sha=$(git -C orchestra rev-parse HEAD)" >> $GITHUB_OUTPUT + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Cache apt packages + uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: >- + tmux zsh portaudio19-dev libnss3-dev libatk-bridge2.0-dev libdrm2 + libxkbcommon0 libgtk-3-dev libgbm-dev libgirepository1.0-dev + libasound2t64 libatspi2.0-0 libgtk-3-0 libnspr4 libwayland-client0 + libxcomposite1 libxdamage1 libxrandr2 xvfb + version: 1.1 + + - name: Install uv and poetry + run: | + pip install uv + pip install poetry + + - name: Cache uv dependencies + uses: actions/cache@v5 + with: + path: | + ~/.cache/uv + .venv + key: uv-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'uv.lock', 'unillm/pyproject.toml', 'unify/pyproject.toml') }} + restore-keys: | + uv-${{ runner.os }}- + + - name: Configure unify/unillm paths for CI + run: | + sed -i 's|path = "../unify"|path = "./unify"|g' pyproject.toml + sed -i 's|path = "../unillm"|path = "./unillm"|g' pyproject.toml + + - name: Sync dependencies with uv + run: | + uv sync --all-groups + + - name: Verify local packages installed correctly + run: | + uv run python -c " + import sys + print('Python path (first 5):') + for p in sys.path[:5]: + print(f' {p}') + + from unillm import AsyncUnify, Unify + import unify + + print(f'✓ unillm: {AsyncUnify.__module__}') + print(f'✓ unify: {unify.__file__}') + assert hasattr(unify, 'create_project'), f'Wrong unify package!' + print('✓ Local packages verified successfully') + " + + - name: Cache Playwright assets + uses: actions/cache@v5 + id: playwright-cache + with: + path: ~/.cache/ms-playwright + key: playwright-${{ runner.os }}-${{ hashFiles('pyproject.toml') }} + restore-keys: | + playwright-${{ runner.os }}- + + - name: Install Playwright assets + if: steps.playwright-cache.outputs.cache-hit != 'true' + run: | + uv run playwright install --with-deps + + - name: Setup Node.js (agent-service tests) + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Clone magnitude repo (agent-service tests) + uses: actions/checkout@v6 + with: + repository: unifyai/magnitude + ref: unity-modifications + path: magnitude + token: ${{ secrets.CLONE_TOKEN }} + fetch-depth: 1 + + - name: Build magnitude packages and install agent-service (agent-service tests) + run: | + cd magnitude/packages/magnitude-core + npm install + npm run build + echo "✓ magnitude-core built" + + cd ../magnitude-extract + npm install + npm run build + echo "✓ magnitude-extract built" + + cd ${{ github.workspace }}/agent-service + npm install + echo "✓ agent-service dependencies installed" + + npx patchright install chromium + echo "✓ Patchright Chromium installed" + + - name: Cache orchestra poetry dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry + key: poetry-orchestra-${{ runner.os }}-${{ steps.orchestra-sha.outputs.sha }} + restore-keys: | + poetry-orchestra-${{ runner.os }}- + + - name: Install orchestra dependencies + run: | + cd "$ORCHESTRA_REPO_PATH" + poetry install --no-interaction + + - name: Run tests + id: run-tests + run: | + set +e + + Xvfb :99 -screen 0 1920x1080x24 & + XVFB_PID=$! + export DISPLAY=:99 + trap "kill $XVFB_PID 2>/dev/null" EXIT + + echo "Running: parallel_run.sh --timeout 7200 tests/ -- -m \"not llm_call\" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop" + tests/parallel_run.sh --timeout 7200 tests/ -- -m "not llm_call" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop + TEST_EXIT_CODE=$? + echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" + exit $TEST_EXIT_CODE + + - name: Dump orchestra logs on failure + if: failure() + run: | + echo "=== Orchestra Server Logs ===" + if [ -f /tmp/orchestra-local-server.log ]; then + cat /tmp/orchestra-local-server.log + mkdir -p logs/orchestra + cp /tmp/orchestra-local-server.log logs/orchestra/server.log + echo "(Copied to logs/orchestra/server.log for artifact upload)" + else + echo "No orchestra log file found at /tmp/orchestra-local-server.log" + fi + echo "" + echo "=== Orchestra Server Process Status ===" + if [ -f /tmp/orchestra-local-server.pid ]; then + PID=$(cat /tmp/orchestra-local-server.pid) + echo "Expected PID: $PID" + if ps -p "$PID" > /dev/null 2>&1; then + echo "Process is still running" + else + echo "Process is NOT running" + fi + else + echo "No PID file found" + fi + echo "" + echo "=== Any orchestra-related processes ===" + ps aux | grep -E "(orchestra|uvicorn)" | grep -v grep || echo "None found" + + - name: Generate failure summary + if: failure() + run: | + ARTIFACTS_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts" + + TOTAL_COUNT=0 + if [ -d "logs/pytest" ]; then + TOTAL_COUNT=$(find logs/pytest -name "*.txt" -type f 2>/dev/null | wc -l | tr -d ' ') + fi + + FAILED_SESSIONS="" + FAIL_COUNT=0 + for sock in /tmp/tmux-$(id -u)/unity*; do + if [ -S "$sock" ]; then + socket_name=$(basename "$sock") + FAILED_SESSIONS=$(tmux -L "$socket_name" ls 2>/dev/null | sed 's/: .*//' | grep '^f ' || true) + FAIL_COUNT=$(echo "$FAILED_SESSIONS" | grep -c '^f ' || echo 0) + break + fi + done + + PASS_COUNT=$((TOTAL_COUNT - FAIL_COUNT)) + + echo "=== Test Results ===" + echo "✅ $PASS_COUNT passed, ❌ $FAIL_COUNT failed" + echo "" + echo "Failed tests:" + echo "$FAILED_SESSIONS" + echo "" + echo "Download logs: $ARTIFACTS_URL" + + echo "## ❌ Test Failures (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**✅ $PASS_COUNT** passed, **❌ $FAIL_COUNT** failed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Failed tests:" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "$FAILED_SESSIONS" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + + DURATION_FILE=$(find logs/pytest -name "duration_summary.txt" 2>/dev/null | head -1) + if [ -n "$DURATION_FILE" ] && [ -f "$DURATION_FILE" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "📊 Test Stats: Duration & Cache (fastest → slowest)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + cat "$DURATION_FILE" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + fi + + - name: Generate success summary + if: success() + run: | + ARTIFACTS_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts" + + log_count=0 + if [ -d "logs/pytest" ]; then + log_count=$(find logs/pytest -name "*.txt" -type f 2>/dev/null | wc -l | tr -d ' ') + fi + + if [ "$log_count" -eq 0 ]; then + echo "## ⚠️ No Tests Ran (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**0 tests executed** — this likely indicates a problem with test discovery." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Check the 'Run tests' step logs for details." >> $GITHUB_STEP_SUMMARY + echo "" + echo "Error: No tests were executed. This indicates a problem with test discovery." >&2 + echo "Check the 'Run tests' step output for pytest collection errors." >&2 + exit 1 + fi + + echo "=== All tests passed ===" + echo "✅ $log_count passed" + echo "" + echo "View logs: $ARTIFACTS_URL" + + echo "## ✅ All Tests Passed (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**✅ $log_count** passed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY + + DURATION_FILE=$(find logs/pytest -name "duration_summary.txt" 2>/dev/null | head -1) + if [ -n "$DURATION_FILE" ] && [ -f "$DURATION_FILE" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "📊 Test Stats: Duration & Cache (fastest → slowest)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + cat "$DURATION_FILE" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + fi + + - name: Stop local orchestra (before log upload) + if: always() + continue-on-error: true + run: | + trap 'echo "Received SIGTERM, exiting gracefully"; exit 0' TERM + orchestra/scripts/local.sh stop || true + + - name: Prepare logs for upload + if: always() + run: | + echo "=== Preparing logs for upload ===" + mkdir -p logs + + if [ -d logs/pytest ]; then + file_count=$(find logs/pytest -type f 2>/dev/null | wc -l | tr -d ' ') + echo "✓ pytest logs: $file_count files" + else + echo "○ pytest logs: directory not found (skipping)" + fi + + if [ -d logs/unillm ]; then + file_count=$(find logs/unillm -type f 2>/dev/null | wc -l | tr -d ' ') + echo "✓ unillm logs: $file_count files" + else + echo "○ unillm logs: directory not found (skipping)" + fi + + if [ -d logs/orchestra ]; then + sanitized=0 + while IFS= read -r -d '' file; do + newname=$(echo "$file" | tr ':' '_') + if mv "$file" "$newname" 2>/dev/null; then + ((sanitized++)) || true + fi + done < <(find logs/orchestra -type f -name '*:*' -print0 2>/dev/null || true) + + file_count=$(find logs/orchestra -type f 2>/dev/null | wc -l | tr -d ' ') + if [ "$sanitized" -gt 0 ]; then + echo "✓ orchestra logs: $file_count files ($sanitized filenames sanitized)" + else + echo "✓ orchestra logs: $file_count files" + fi + else + echo "○ orchestra logs: directory not found (skipping)" + fi + + echo "=== Log preparation complete ===" + + - name: Upload logs + if: always() + uses: actions/upload-artifact@v7 + with: + name: logs-lightweight + path: logs/ + if-no-files-found: ignore + retention-days: 90 + + - name: Cleanup on cancellation + if: cancelled() + run: | + echo "Workflow cancelled - cleaning up tmux sessions and orphaned processes..." + tests/kill_server.sh --all || true + orchestra/scripts/local.sh stop || true + echo "Cleanup complete." From 109f3d48ed63e7a4dece3e47a3e9138a67d9e624 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 15:51:52 +0200 Subject: [PATCH 06/14] ci: fix magnitude package build --- .github/workflows/tests_lightweight.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index fa934ecdc..2c772c9fb 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -170,13 +170,15 @@ jobs: - name: Build magnitude packages and install agent-service (agent-service tests) run: | - cd magnitude/packages/magnitude-core + cd magnitude npm install + echo "✓ magnitude root deps installed (turbo, etc.)" + + cd packages/magnitude-core npm run build echo "✓ magnitude-core built" cd ../magnitude-extract - npm install npm run build echo "✓ magnitude-extract built" From 97e499c760571fd6baff672ec2b0dc37cae275fa Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 15:59:51 +0200 Subject: [PATCH 07/14] ci(lightweight): exclude agent_service tests --- .github/workflows/tests_lightweight.yml | 39 ++----------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index 2c772c9fb..449a3a616 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -154,41 +154,6 @@ jobs: run: | uv run playwright install --with-deps - - name: Setup Node.js (agent-service tests) - uses: actions/setup-node@v4 - with: - node-version: '22' - - - name: Clone magnitude repo (agent-service tests) - uses: actions/checkout@v6 - with: - repository: unifyai/magnitude - ref: unity-modifications - path: magnitude - token: ${{ secrets.CLONE_TOKEN }} - fetch-depth: 1 - - - name: Build magnitude packages and install agent-service (agent-service tests) - run: | - cd magnitude - npm install - echo "✓ magnitude root deps installed (turbo, etc.)" - - cd packages/magnitude-core - npm run build - echo "✓ magnitude-core built" - - cd ../magnitude-extract - npm run build - echo "✓ magnitude-extract built" - - cd ${{ github.workspace }}/agent-service - npm install - echo "✓ agent-service dependencies installed" - - npx patchright install chromium - echo "✓ Patchright Chromium installed" - - name: Cache orchestra poetry dependencies uses: actions/cache@v4 with: @@ -212,8 +177,8 @@ jobs: export DISPLAY=:99 trap "kill $XVFB_PID 2>/dev/null" EXIT - echo "Running: parallel_run.sh --timeout 7200 tests/ -- -m \"not llm_call\" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop" - tests/parallel_run.sh --timeout 7200 tests/ -- -m "not llm_call" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop + echo "Running: parallel_run.sh --timeout 7200 tests/ -- -m \"not llm_call\" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop --ignore=tests/agent_service --ignore=tests/demo_url_mapping" + tests/parallel_run.sh --timeout 7200 tests/ -- -m "not llm_call" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop --ignore=tests/agent_service --ignore=tests/demo_url_mapping TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE From 49890686998ff20244fdc0bb403e33b67f83330e Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 19:31:23 +0200 Subject: [PATCH 08/14] ci(lightweight): split testing to 8 jobs --- .github/workflows/tests_lightweight.yml | 71 ++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index 449a3a616..c85a10d78 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -1,4 +1,4 @@ -# Lightweight test suite: runs all tests with -m "not llm_call" as a single job. +# Lightweight test suite: runs all tests with -m "not llm_call" across 8 fixed per-test shards. # For the full test suite (with LLM calls, matrix parallelism, cache consolidation), # use tests.yml via workflow_dispatch. name: Tests (lightweight) @@ -21,11 +21,16 @@ on: jobs: pytest: - name: pytest (lightweight) + name: pytest (lightweight shard ${{ matrix.shard }}/8) runs-on: ubuntu-latest-8-cores environment: unity-testing timeout-minutes: 130 + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3, 4, 5, 6, 7, 8] env: + LIGHTWEIGHT_SHARD_COUNT: "8" UNIFY_TESTS_RAND_PROJ: "false" UNIFY_TESTS_DELETE_PROJ_ON_START: "false" UNIFY_TESTS_DELETE_PROJ_ON_EXIT: "false" @@ -167,18 +172,68 @@ jobs: cd "$ORCHESTRA_REPO_PATH" poetry install --no-interaction + - name: Collect test nodes for this shard + id: collect-nodes + env: + LIGHTWEIGHT_SHARD: ${{ matrix.shard }} + run: | + SHARD_NODES_FILE="$(mktemp)" + echo "shard_nodes_file=$SHARD_NODES_FILE" >> "$GITHUB_OUTPUT" + export SHARD_NODES_FILE + + uv run python - <<'PY' + import os + import subprocess + import sys + from pathlib import Path + + cmd = [ + sys.executable, + "-m", + "pytest", + "--collect-only", + "-q", + "-m", + "not llm_call", + "tests/", + "--ignore=tests/actor", + "--ignore=tests/conversation_manager", + "--ignore=tests/async_tool_loop", + "--ignore=tests/agent_service", + "--ignore=tests/demo_url_mapping", + ] + result = subprocess.run(cmd, text=True, capture_output=True) + if result.returncode != 0: + print(result.stdout) + print(result.stderr, file=sys.stderr) + raise SystemExit(result.returncode) + + nodes = [line.strip() for line in result.stdout.splitlines() if "::" in line] + shard = int(os.environ["LIGHTWEIGHT_SHARD"]) + shard_count = int(os.environ["LIGHTWEIGHT_SHARD_COUNT"]) + shard_nodes = [node for index, node in enumerate(nodes) if index % shard_count == shard - 1] + Path(os.environ["SHARD_NODES_FILE"]).write_text("".join(f"{node}\n" for node in shard_nodes)) + + print(f"Collected {len(nodes)} lightweight nodes") + print(f"Shard {shard}/{shard_count}: {len(shard_nodes)} nodes") + + if not shard_nodes: + raise SystemExit(f"Shard {shard}/{shard_count} has no collected tests") + PY + - name: Run tests id: run-tests run: | set +e + mapfile -t SHARD_NODES < '${{ steps.collect-nodes.outputs.shard_nodes_file }}' Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 trap "kill $XVFB_PID 2>/dev/null" EXIT - echo "Running: parallel_run.sh --timeout 7200 tests/ -- -m \"not llm_call\" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop --ignore=tests/agent_service --ignore=tests/demo_url_mapping" - tests/parallel_run.sh --timeout 7200 tests/ -- -m "not llm_call" --ignore=tests/actor --ignore=tests/conversation_manager --ignore=tests/async_tool_loop --ignore=tests/agent_service --ignore=tests/demo_url_mapping + echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_NODES[@]} explicit test nodes" + tests/parallel_run.sh --timeout 7200 "${SHARD_NODES[@]}" TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE @@ -243,7 +298,7 @@ jobs: echo "" echo "Download logs: $ARTIFACTS_URL" - echo "## ❌ Test Failures (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "## ❌ Test Failures (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**✅ $PASS_COUNT** passed, **❌ $FAIL_COUNT** failed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY @@ -275,7 +330,7 @@ jobs: fi if [ "$log_count" -eq 0 ]; then - echo "## ⚠️ No Tests Ran (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "## ⚠️ No Tests Ran (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**0 tests executed** — this likely indicates a problem with test discovery." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY @@ -291,7 +346,7 @@ jobs: echo "" echo "View logs: $ARTIFACTS_URL" - echo "## ✅ All Tests Passed (lightweight)" >> $GITHUB_STEP_SUMMARY + echo "## ✅ All Tests Passed (lightweight shard ${{ matrix.shard }}/8)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**✅ $log_count** passed — [view logs]($ARTIFACTS_URL)" >> $GITHUB_STEP_SUMMARY @@ -359,7 +414,7 @@ jobs: if: always() uses: actions/upload-artifact@v7 with: - name: logs-lightweight + name: logs-lightweight-shard-${{ matrix.shard }} path: logs/ if-no-files-found: ignore retention-days: 90 From cfd350d35bf1cae79a98a4e61edd99e95f3c8726 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 20:14:57 +0200 Subject: [PATCH 09/14] ci(lightweight): skip unity init on collect only --- .github/workflows/tests_lightweight.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index c85a10d78..a99d3433b 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -176,6 +176,7 @@ jobs: id: collect-nodes env: LIGHTWEIGHT_SHARD: ${{ matrix.shard }} + SKIP_UNITY_TEST_INIT: "1" run: | SHARD_NODES_FILE="$(mktemp)" echo "shard_nodes_file=$SHARD_NODES_FILE" >> "$GITHUB_OUTPUT" From 30eb516ed170845225695341b3bcd59a86bf7fb3 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Wed, 8 Apr 2026 20:48:43 +0200 Subject: [PATCH 10/14] ci(lightweight): shard by test file counts instead of test nodes --- .github/workflows/tests_lightweight.yml | 80 +++++++++++++++++++------ 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index a99d3433b..90f66aebf 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -172,17 +172,18 @@ jobs: cd "$ORCHESTRA_REPO_PATH" poetry install --no-interaction - - name: Collect test nodes for this shard - id: collect-nodes + - name: Collect test files for this shard + id: collect-files env: LIGHTWEIGHT_SHARD: ${{ matrix.shard }} SKIP_UNITY_TEST_INIT: "1" run: | - SHARD_NODES_FILE="$(mktemp)" - echo "shard_nodes_file=$SHARD_NODES_FILE" >> "$GITHUB_OUTPUT" - export SHARD_NODES_FILE + SHARD_FILES_FILE="$(mktemp)" + echo "shard_files_file=$SHARD_FILES_FILE" >> "$GITHUB_OUTPUT" + export SHARD_FILES_FILE uv run python - <<'PY' + import math import os import subprocess import sys @@ -193,7 +194,7 @@ jobs: "-m", "pytest", "--collect-only", - "-q", + "-qq", "-m", "not llm_call", "tests/", @@ -209,32 +210,75 @@ jobs: print(result.stderr, file=sys.stderr) raise SystemExit(result.returncode) - nodes = [line.strip() for line in result.stdout.splitlines() if "::" in line] shard = int(os.environ["LIGHTWEIGHT_SHARD"]) shard_count = int(os.environ["LIGHTWEIGHT_SHARD_COUNT"]) - shard_nodes = [node for index, node in enumerate(nodes) if index % shard_count == shard - 1] - Path(os.environ["SHARD_NODES_FILE"]).write_text("".join(f"{node}\n" for node in shard_nodes)) - - print(f"Collected {len(nodes)} lightweight nodes") - print(f"Shard {shard}/{shard_count}: {len(shard_nodes)} nodes") - - if not shard_nodes: - raise SystemExit(f"Shard {shard}/{shard_count} has no collected tests") + file_counts = [] + for raw_line in result.stdout.splitlines(): + line = raw_line.strip() + if not line or line.startswith("="): + continue + file_path, separator, count_text = line.rpartition(":") + if separator != ":" or not file_path.startswith("tests/"): + continue + test_count = int(count_text.strip()) + file_counts.append((file_path, test_count)) + + if not file_counts: + raise SystemExit("Pytest did not report any lightweight test files") + + total_tests = sum(test_count for _, test_count in file_counts) + target_tests_per_shard = math.ceil(total_tests / shard_count) + shards = [[] for _ in range(shard_count)] + shard_test_counts = [0] * shard_count + current_shard = 0 + + for index, (file_path, test_count) in enumerate(file_counts): + shards[current_shard].append(file_path) + shard_test_counts[current_shard] += test_count + + remaining_files = len(file_counts) - index - 1 + remaining_shards = shard_count - current_shard - 1 + if ( + current_shard < shard_count - 1 + and shard_test_counts[current_shard] >= target_tests_per_shard + and remaining_files >= remaining_shards + ): + current_shard += 1 + + shard_files = shards[shard - 1] + shard_test_count = shard_test_counts[shard - 1] + Path(os.environ["SHARD_FILES_FILE"]).write_text("".join(f"{file_path}\n" for file_path in shard_files)) + + print(f"Collected {len(file_counts)} lightweight test files") + print(f"Collected {total_tests} lightweight tests") + print( + f"Target per shard: {target_tests_per_shard} tests " + f"across {shard_count} shards" + ) + print( + f"Shard {shard}/{shard_count}: " + f"{len(shard_files)} files, {shard_test_count} tests" + ) + for file_path in shard_files: + print(f" - {file_path}") + + if not shard_files: + raise SystemExit(f"Shard {shard}/{shard_count} has no collected test files") PY - name: Run tests id: run-tests run: | set +e - mapfile -t SHARD_NODES < '${{ steps.collect-nodes.outputs.shard_nodes_file }}' + mapfile -t SHARD_FILES < '${{ steps.collect-files.outputs.shard_files_file }}' Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 trap "kill $XVFB_PID 2>/dev/null" EXIT - echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_NODES[@]} explicit test nodes" - tests/parallel_run.sh --timeout 7200 "${SHARD_NODES[@]}" + echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_FILES[@]} test files" + tests/parallel_run.sh --timeout 7200 "${SHARD_FILES[@]}" TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE From 94067e5b2bc772e28e955af7456d8038165eb85a Mon Sep 17 00:00:00 2001 From: CatB1t Date: Thu, 9 Apr 2026 11:16:21 +0200 Subject: [PATCH 11/14] Revert "ci(lightweight): shard by test file counts instead of test nodes" This reverts commit 30eb516ed170845225695341b3bcd59a86bf7fb3. --- .github/workflows/tests_lightweight.yml | 80 ++++++------------------- 1 file changed, 18 insertions(+), 62 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index 90f66aebf..a99d3433b 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -172,18 +172,17 @@ jobs: cd "$ORCHESTRA_REPO_PATH" poetry install --no-interaction - - name: Collect test files for this shard - id: collect-files + - name: Collect test nodes for this shard + id: collect-nodes env: LIGHTWEIGHT_SHARD: ${{ matrix.shard }} SKIP_UNITY_TEST_INIT: "1" run: | - SHARD_FILES_FILE="$(mktemp)" - echo "shard_files_file=$SHARD_FILES_FILE" >> "$GITHUB_OUTPUT" - export SHARD_FILES_FILE + SHARD_NODES_FILE="$(mktemp)" + echo "shard_nodes_file=$SHARD_NODES_FILE" >> "$GITHUB_OUTPUT" + export SHARD_NODES_FILE uv run python - <<'PY' - import math import os import subprocess import sys @@ -194,7 +193,7 @@ jobs: "-m", "pytest", "--collect-only", - "-qq", + "-q", "-m", "not llm_call", "tests/", @@ -210,75 +209,32 @@ jobs: print(result.stderr, file=sys.stderr) raise SystemExit(result.returncode) + nodes = [line.strip() for line in result.stdout.splitlines() if "::" in line] shard = int(os.environ["LIGHTWEIGHT_SHARD"]) shard_count = int(os.environ["LIGHTWEIGHT_SHARD_COUNT"]) - file_counts = [] - for raw_line in result.stdout.splitlines(): - line = raw_line.strip() - if not line or line.startswith("="): - continue - file_path, separator, count_text = line.rpartition(":") - if separator != ":" or not file_path.startswith("tests/"): - continue - test_count = int(count_text.strip()) - file_counts.append((file_path, test_count)) - - if not file_counts: - raise SystemExit("Pytest did not report any lightweight test files") - - total_tests = sum(test_count for _, test_count in file_counts) - target_tests_per_shard = math.ceil(total_tests / shard_count) - shards = [[] for _ in range(shard_count)] - shard_test_counts = [0] * shard_count - current_shard = 0 - - for index, (file_path, test_count) in enumerate(file_counts): - shards[current_shard].append(file_path) - shard_test_counts[current_shard] += test_count - - remaining_files = len(file_counts) - index - 1 - remaining_shards = shard_count - current_shard - 1 - if ( - current_shard < shard_count - 1 - and shard_test_counts[current_shard] >= target_tests_per_shard - and remaining_files >= remaining_shards - ): - current_shard += 1 - - shard_files = shards[shard - 1] - shard_test_count = shard_test_counts[shard - 1] - Path(os.environ["SHARD_FILES_FILE"]).write_text("".join(f"{file_path}\n" for file_path in shard_files)) - - print(f"Collected {len(file_counts)} lightweight test files") - print(f"Collected {total_tests} lightweight tests") - print( - f"Target per shard: {target_tests_per_shard} tests " - f"across {shard_count} shards" - ) - print( - f"Shard {shard}/{shard_count}: " - f"{len(shard_files)} files, {shard_test_count} tests" - ) - for file_path in shard_files: - print(f" - {file_path}") - - if not shard_files: - raise SystemExit(f"Shard {shard}/{shard_count} has no collected test files") + shard_nodes = [node for index, node in enumerate(nodes) if index % shard_count == shard - 1] + Path(os.environ["SHARD_NODES_FILE"]).write_text("".join(f"{node}\n" for node in shard_nodes)) + + print(f"Collected {len(nodes)} lightweight nodes") + print(f"Shard {shard}/{shard_count}: {len(shard_nodes)} nodes") + + if not shard_nodes: + raise SystemExit(f"Shard {shard}/{shard_count} has no collected tests") PY - name: Run tests id: run-tests run: | set +e - mapfile -t SHARD_FILES < '${{ steps.collect-files.outputs.shard_files_file }}' + mapfile -t SHARD_NODES < '${{ steps.collect-nodes.outputs.shard_nodes_file }}' Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 trap "kill $XVFB_PID 2>/dev/null" EXIT - echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_FILES[@]} test files" - tests/parallel_run.sh --timeout 7200 "${SHARD_FILES[@]}" + echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_NODES[@]} explicit test nodes" + tests/parallel_run.sh --timeout 7200 "${SHARD_NODES[@]}" TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE From 7843fc82366124a9ac041b471a85c44cbd0be453 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Thu, 9 Apr 2026 11:34:47 +0200 Subject: [PATCH 12/14] ci(lighweight): add --from-file and --skip-collection to parallel_run, update lightweight tests to use it --- .github/workflows/tests_lightweight.yml | 7 ++- tests/_parse_args.sh | 31 +++++++++- tests/parallel_run.sh | 76 +++++++++++++++++++++++-- 3 files changed, 103 insertions(+), 11 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index a99d3433b..2c33e5c33 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -226,15 +226,16 @@ jobs: id: run-tests run: | set +e - mapfile -t SHARD_NODES < '${{ steps.collect-nodes.outputs.shard_nodes_file }}' + SHARD_NODES_FILE='${{ steps.collect-nodes.outputs.shard_nodes_file }}' + SHARD_NODE_COUNT=$(wc -l < "$SHARD_NODES_FILE" | tr -d ' ') Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 trap "kill $XVFB_PID 2>/dev/null" EXIT - echo "Running shard ${{ matrix.shard }}/8 with ${#SHARD_NODES[@]} explicit test nodes" - tests/parallel_run.sh --timeout 7200 "${SHARD_NODES[@]}" + echo "Running shard ${{ matrix.shard }}/8 with $SHARD_NODE_COUNT explicit test nodes from $SHARD_NODES_FILE" + tests/parallel_run.sh --timeout 7200 --skip-collection --from-file "$SHARD_NODES_FILE" TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE diff --git a/tests/_parse_args.sh b/tests/_parse_args.sh index 5730f772f..d1f7f8006 100644 --- a/tests/_parse_args.sh +++ b/tests/_parse_args.sh @@ -11,9 +11,10 @@ # # After calling parse_test_args, these variables are populated: # SERIAL, TIMEOUT, NAME_PATTERN, EVAL_ONLY, SYMBOLIC_ONLY, -# REPEAT_COUNT, OVERWRITE_SCENARIOS, MAX_JOBS, ENV_OVERRIDES[], -# TAGS[], PYTEST_EXTRA_ARGS[], PYTEST_COLLECTION_ARGS[], -# POSITIONAL_ARGS[] +# REPEAT_COUNT, OVERWRITE_SCENARIOS, SKIP_COLLECTION, +# MAX_JOBS, ENV_OVERRIDES[], +# TAGS[], FROM_FILE_PATHS[], PYTEST_EXTRA_ARGS[], +# PYTEST_COLLECTION_ARGS[], POSITIONAL_ARGS[] # # Additional functions: # resolve_test_paths REPO_ROOT - Validates paths in POSITIONAL_ARGS, sets RESOLVED_TEST_PATHS[] @@ -42,9 +43,11 @@ parse_test_args() { SYMBOLIC_ONLY=0 REPEAT_COUNT=1 OVERWRITE_SCENARIOS=0 + SKIP_COLLECTION=0 MAX_JOBS=$_PARSE_ARGS_NUM_CORES ENV_OVERRIDES=() TAGS=() + FROM_FILE_PATHS=() PYTEST_EXTRA_ARGS=() PYTEST_COLLECTION_ARGS=() PYTEST_IGNORE_PATHS=() @@ -104,6 +107,10 @@ parse_test_args() { OVERWRITE_SCENARIOS=1 shift ;; + --skip-collection) + SKIP_COLLECTION=1 + shift + ;; --tags) if [[ -n "${2-}" ]]; then # Split on comma and add each tag to TAGS array @@ -117,6 +124,15 @@ parse_test_args() { return 2 fi ;; + --from-file) + if [[ -n "${2-}" ]]; then + FROM_FILE_PATHS+=( "$2" ) + shift 2 + else + echo "Error: --from-file requires a file path." >&2 + return 2 + fi + ;; -j|--jobs) if [[ -z "${2-}" ]]; then echo "Error: -j|--jobs requires an argument (e.g., --jobs 8, --jobs 0, --jobs none)." >&2 @@ -260,12 +276,17 @@ reconstruct_parallel_run_args() { (( SYMBOLIC_ONLY )) && args="$args --symbolic-only" (( REPEAT_COUNT > 1 )) && args="$args --repeat $REPEAT_COUNT" (( OVERWRITE_SCENARIOS )) && args="$args --overwrite-scenarios" + (( SKIP_COLLECTION )) && args="$args --skip-collection" # Note: MAX_JOBS is not passed to CI (CI has its own resource limits) for tag in "${TAGS[@]}"; do args="$args --tags $(printf '%q' "$tag")" done + for path in "${FROM_FILE_PATHS[@]}"; do + args="$args --from-file $(printf '%q' "$path")" + done + # Include --env flags if requested if (( include_env )); then for kv in "${ENV_OVERRIDES[@]}"; do @@ -304,7 +325,9 @@ Options: --eval-only Run only @pytest.mark.eval tests --symbolic-only Run only non-eval tests --repeat N Run each test N times + --skip-collection Trust explicit node ids instead of validating via collection --tags TAG Tag runs for filtering (repeatable) + --from-file PATH Read test targets from a newline-delimited file --overwrite-scenarios Delete and recreate test scenarios -h, --help Show this help -- Pass remaining args directly to pytest @@ -316,6 +339,8 @@ Examples: $script_name -s tests/ # Serial mode (per-file) $script_name -j 8 tests/ # Limit to 8 concurrent $script_name --eval-only tests/ # Only eval tests + $script_name --skip-collection tests/foo.py::test_bar + $script_name --from-file targets.txt # Read targets from file $script_name -e UNILLM_CACHE=false tests/ $script_name tests/ -- -v --tb=short # Pass args to pytest $script_name tests/ -- -k 'gpt-5' # Filter by test name pattern diff --git a/tests/parallel_run.sh b/tests/parallel_run.sh index 0fed212d6..cf3dc1a9e 100755 --- a/tests/parallel_run.sh +++ b/tests/parallel_run.sh @@ -188,8 +188,8 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd -P)" # Parse arguments using shared helper # Returns: 0=success, 1=help requested, 2=error -parse_test_args "$@" -_parse_result=$? +_parse_result=0 +parse_test_args "$@" || _parse_result=$? if (( _parse_result == 1 )); then # Help requested HELP_SCRIPT_NAME="parallel_run.sh" @@ -575,8 +575,51 @@ build_env_exports() { echo "$exports" } -# Reset positional parameters safely under nounset (only expand if set) -set -- ${POSITIONAL_ARGS[@]+"${POSITIONAL_ARGS[@]}"} +# Resolve an auxiliary input file path relative to the caller, tests/, or repo root. +resolve_input_file_path() { + local path="$1" + if [[ -f "$path" ]]; then + printf "%s" "$path" + elif [[ -f "$SCRIPT_DIR/$path" ]]; then + printf "%s" "$SCRIPT_DIR/$path" + elif [[ -f "$REPO_ROOT/$path" ]]; then + printf "%s" "$REPO_ROOT/$path" + else + return 1 + fi +} + +expanded_targets=() +if (( ${#POSITIONAL_ARGS[@]} > 0 )); then + expanded_targets=( "${POSITIONAL_ARGS[@]}" ) +fi + +had_explicit_target_source=0 +if (( ${#POSITIONAL_ARGS[@]} > 0 || ${#FROM_FILE_PATHS[@]} > 0 )); then + had_explicit_target_source=1 +fi + +if (( ${#FROM_FILE_PATHS[@]} > 0 )); then + for list_path in "${FROM_FILE_PATHS[@]}"; do + resolved_list_path=$(resolve_input_file_path "$list_path") || { + echo "Error: --from-file not found: $list_path" >&2 + exit 2 + } + while IFS= read -r raw_line || [[ -n "$raw_line" ]]; do + line="${raw_line%$'\r'}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + [[ -z "$line" || "$line" == \#* ]] && continue + expanded_targets+=( "$line" ) + done < "$resolved_list_path" + done +fi + +if (( ${#expanded_targets[@]} > 0 )); then + set -- "${expanded_targets[@]}" +else + set -- +fi # Always operate from the repo root for discovery, regardless of where the script was invoked cd "$REPO_ROOT" @@ -979,6 +1022,10 @@ declare -a direct_files=() declare -a direct_nodes=() if (( $# == 0 )); then + if (( had_explicit_target_source )); then + echo "No valid directories, files, or tests provided." >&2 + exit 1 + fi roots=( "." ) else for arg in "$@"; do @@ -1140,6 +1187,11 @@ validate_and_add_direct_nodes() { return 0 fi + if (( SKIP_COLLECTION )); then + printf '%s\0' "${direct_nodes[@]}" >> "$tmp" + return 0 + fi + # Extract unique base files from direct_nodes local -a base_files=() local seen_files="" @@ -1154,15 +1206,19 @@ validate_and_add_direct_nodes() { # Collect all valid nodes from those files (no marker filter — just checking existence) local collected collected=$(collect_nodes_batch "" "${base_files[@]}") + local collected_file + collected_file=$(mktemp) + printf '%s\n' "$collected" > "$collected_file" # Validate each direct_node against collected output for node in "${direct_nodes[@]}"; do - if echo "$collected" | grep -qxF "$node"; then + if grep -qxF -- "$node" "$collected_file"; then printf '%s\0' "$node" >> "$tmp" else echo "Error: Test node not found (skipping): $node" >&2 fi done + rm -f "$collected_file" } # Gather recursive .py files from roots (NUL-delimited, sorted) @@ -1345,6 +1401,7 @@ fi # Print header before drip-feeding session creation echo "Creating ${#files[@]} tmux sessions..." +WALL_START=$(date +%s) for target in "${files[@]}"; do # Report any completions before creating new sessions @@ -1561,8 +1618,17 @@ if (( total_tests > 0 )); then else duration_str="${total_duration}s" fi + wall_duration=$(( $(date +%s) - WALL_START )) + if (( wall_duration >= 60 )); then + wall_mins=$((wall_duration / 60)) + wall_secs=$((wall_duration % 60)) + wall_str="${wall_mins}m ${wall_secs}s" + else + wall_str="${wall_duration}s" + fi total_cache_rate=$(format_cache_rate "$total_hits" "$total_misses") total_calls=$((total_hits + total_misses)) + print_duration_line " Wall time: $wall_str" print_duration_line " Serial duration: $duration_str" print_duration_line " LLM cache: $total_cache_rate ($total_hits hits, $total_misses misses, $total_calls total)" print_duration_line " LLM cost: \$$total_cost" From c543d245cf05df0307d49a4c0aed044fee0b8126 Mon Sep 17 00:00:00 2001 From: CatB1t Date: Fri, 10 Apr 2026 12:11:52 +0200 Subject: [PATCH 13/14] revert changes to 'test.yml' --- .github/scripts/discover_test_paths.py | 87 ++------- .github/workflows/tests.yml | 249 +++++++++++++++++++------ 2 files changed, 212 insertions(+), 124 deletions(-) diff --git a/.github/scripts/discover_test_paths.py b/.github/scripts/discover_test_paths.py index db203cab3..49e37e701 100644 --- a/.github/scripts/discover_test_paths.py +++ b/.github/scripts/discover_test_paths.py @@ -9,9 +9,6 @@ # Expand specific paths to their leaf directories python discover_test_paths.py tests/function_manager tests/actor - # Exclude directories from discovery - python discover_test_paths.py --exclude tests/actor --exclude tests/conversation_manager - When explicit paths are provided: - Files are kept as-is (no expansion) - Directories are expanded to their leaf sub-directories using Option A algorithm @@ -22,8 +19,8 @@ direct test files (space-separated), plus recursive jobs for subdirs """ -import argparse import os +import sys from pathlib import Path EXCLUDE_DIRS = { @@ -36,14 +33,6 @@ "venv", } -_exclude_prefixes: list[str] = [] - - -def _is_excluded(path: str | Path) -> bool: - """Return True if path starts with any --exclude prefix.""" - s = str(path) - return any(s == p or s.startswith(p + "/") for p in _exclude_prefixes) - def has_test_files(directory): """Check if directory has test_*.py files directly in it.""" @@ -57,17 +46,9 @@ def has_test_files(directory): def has_test_subdirs(directory): """Check if directory has subdirectories that contain test files (recursively).""" for subdir in directory.iterdir(): - if ( - subdir.is_dir() - and subdir.name not in EXCLUDE_DIRS - and not _is_excluded(subdir) - ): + if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: for root, dirs, files in os.walk(subdir): - dirs[:] = [ - d - for d in dirs - if d not in EXCLUDE_DIRS and not _is_excluded(os.path.join(root, d)) - ] + dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS] if any(f.startswith("test_") and f.endswith(".py") for f in files): return True return False @@ -86,7 +67,7 @@ def get_direct_test_files(directory): def collect_paths(directory, paths): """Recursively collect test paths using Option A algorithm.""" - if not directory.is_dir() or _is_excluded(directory): + if not directory.is_dir(): return has_files = has_test_files(directory) @@ -100,20 +81,12 @@ def collect_paths(directory, paths): direct_files = get_direct_test_files(directory) paths.append(" ".join(str(f) for f in direct_files)) for subdir in sorted(directory.iterdir()): - if ( - subdir.is_dir() - and subdir.name not in EXCLUDE_DIRS - and not _is_excluded(subdir) - ): + if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: collect_paths(subdir, paths) elif has_subdirs: # No direct test files, but has subdirs with tests: just recurse for subdir in sorted(directory.iterdir()): - if ( - subdir.is_dir() - and subdir.name not in EXCLUDE_DIRS - and not _is_excluded(subdir) - ): + if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: collect_paths(subdir, paths) @@ -127,9 +100,6 @@ def expand_path(path_str): path = Path(path_str) paths = [] - if _is_excluded(path): - return [] - if not path.exists(): # Path doesn't exist - return as-is and let pytest handle the error return [path_str] @@ -152,20 +122,12 @@ def expand_path(path_str): direct_files = get_direct_test_files(path) paths.append(" ".join(str(f) for f in direct_files)) for subdir in sorted(path.iterdir()): - if ( - subdir.is_dir() - and subdir.name not in EXCLUDE_DIRS - and not _is_excluded(subdir) - ): + if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: collect_paths(subdir, paths) elif has_subdirs: # No direct test files, but has subdirs with tests: recurse for subdir in sorted(path.iterdir()): - if ( - subdir.is_dir() - and subdir.name not in EXCLUDE_DIRS - and not _is_excluded(subdir) - ): + if subdir.is_dir() and subdir.name not in EXCLUDE_DIRS: collect_paths(subdir, paths) else: # No test files at all - return as-is and let pytest handle it @@ -179,50 +141,33 @@ def discover_all(): test_root = Path("tests") paths = [] + # Handle test files directly in tests/ root (e.g., test_settings.py) for item in sorted(test_root.iterdir()): - if _is_excluded(item): - continue if ( item.is_file() and item.name.startswith("test_") and item.name.endswith(".py") ): paths.append(str(item)) - elif item.is_dir() and item.name not in EXCLUDE_DIRS: + elif item.is_dir() and item.name.startswith("test"): collect_paths(item, paths) return paths def main(): - parser = argparse.ArgumentParser( - description="Discover test paths for CI parallelism", - ) - parser.add_argument( - "paths", - nargs="*", - help="Paths to expand (default: discover all)", - ) - parser.add_argument( - "--exclude", - action="append", - default=[], - help="Directory prefix to exclude (repeatable)", - ) - args = parser.parse_args() - - global _exclude_prefixes - _exclude_prefixes = [p.rstrip("/") for p in args.exclude] - - if args.paths: + if len(sys.argv) > 1: + # Explicit paths provided - expand each one all_paths = [] - for p in args.paths: - expanded = expand_path(p) + for arg in sys.argv[1:]: + expanded = expand_path(arg) all_paths.extend(expanded) paths = all_paths else: + # No arguments - discover all from tests/ paths = discover_all() + # Output unique paths, sorted for p in sorted(set(paths)): print(p) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2ce2d9efc..47116f1f7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,44 +36,137 @@ on: type: number default: 120 + # Push events - tests only run if commit message contains [run-tests] + push: + branches: + - '**' # All branches + + # Pull request events - tests only run if PR title contains [run-tests] + pull_request: + branches: + - '**' # All branches jobs: - # Determine test mode based on trigger type. - # Branch filtering is handled by the on: triggers above, so all events here - # are guaranteed to be: push to staging/main, PR to main/staging, or workflow_dispatch. + # Determine if tests should run based on trigger and commit/PR message should-run-tests: runs-on: ubuntu-latest outputs: - test_mode: ${{ steps.check.outputs.test_mode }} + run_tests: ${{ steps.check.outputs.run_tests }} + parallel_run_cmd: ${{ steps.check.outputs.parallel_run_cmd }} steps: - - name: Determine test mode + - name: Check if tests should run id: check env: + # Pass these via env to avoid shell interpretation of special chars + # (backticks, quotes, $() in commit messages would otherwise be executed) EVENT_NAME: ${{ github.event_name }} + COMMIT_MSG: ${{ github.event.head_commit.message }} + PR_TITLE: ${{ github.event.pull_request.title }} PR_BASE_REF: ${{ github.event.pull_request.base.ref }} + PR_HEAD_REF: ${{ github.event.pull_request.head.ref }} run: | + # Helper function to extract [parallel_run.sh ...] content + # Matches ALL occurrences and combines their arguments + extract_parallel_cmd() { + local msg="$1" + local result="" + local remaining="$msg" + + # Loop through all matches of [parallel_run.sh ...] + while [[ "$remaining" =~ \[parallel_run\.sh[[:space:]]+([^\]]+)\] ]]; do + local match="${BASH_REMATCH[1]}" + # Skip literal "..." which is just documentation/example text + if [[ "$match" != "..." ]]; then + if [[ -n "$result" ]]; then + result="$result $match" + else + result="$match" + fi + fi + # Remove the matched portion and continue searching + remaining="${remaining#*"${BASH_REMATCH[0]}"}" + done + + if [[ -n "$result" ]]; then + echo "$result" + fi + } + + # workflow_dispatch always runs tests if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then - echo "test_mode=manual" >> "$GITHUB_OUTPUT" - echo "Mode: manual (workflow_dispatch)" - elif [[ "$EVENT_NAME" == "push" ]]; then - echo "test_mode=lightweight" >> "$GITHUB_OUTPUT" - echo "Mode: lightweight (push to ${GITHUB_REF#refs/heads/})" - elif [[ "$EVENT_NAME" == "pull_request" ]]; then - if [[ "$PR_BASE_REF" == "main" ]]; then - echo "test_mode=full" >> "$GITHUB_OUTPUT" - echo "Mode: full (PR to main)" + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Tests triggered by workflow_dispatch" + exit 0 + fi + + # For push events, check commit message + if [[ "$EVENT_NAME" == "push" ]]; then + # Check for [parallel_run.sh ...] first + PARALLEL_CMD=$(extract_parallel_cmd "$COMMIT_MSG") + if [[ -n "$PARALLEL_CMD" ]]; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=$PARALLEL_CMD" >> "$GITHUB_OUTPUT" + echo "Tests triggered by commit message: [parallel_run.sh $PARALLEL_CMD]" + exit 0 + fi + + # Fall back to [run-tests] + if [[ "$COMMIT_MSG" == *"[run-tests]"* ]]; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Tests triggered by commit message containing [run-tests]" else - echo "test_mode=lightweight" >> "$GITHUB_OUTPUT" - echo "Mode: lightweight (PR to $PR_BASE_REF)" + echo "run_tests=false" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Skipping tests - commit message does not contain [run-tests] or [parallel_run.sh ...]" fi + exit 0 fi + # For pull_request events, check PR title + if [[ "$EVENT_NAME" == "pull_request" ]]; then + # Auto-run full suite for staging → main PRs (no tags required) + if [[ "$PR_BASE_REF" == "main" && "$PR_HEAD_REF" == "staging" ]]; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Tests auto-triggered: staging → main PR" + exit 0 + fi + + # Check for [parallel_run.sh ...] first + PARALLEL_CMD=$(extract_parallel_cmd "$PR_TITLE") + if [[ -n "$PARALLEL_CMD" ]]; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=$PARALLEL_CMD" >> "$GITHUB_OUTPUT" + echo "Tests triggered by PR title: [parallel_run.sh $PARALLEL_CMD]" + exit 0 + fi + + # Fall back to [run-tests] + if [[ "$PR_TITLE" == *"[run-tests]"* ]]; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Tests triggered by PR title containing [run-tests]" + else + echo "run_tests=false" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Skipping tests - PR title does not contain [run-tests] or [parallel_run.sh ...]" + fi + exit 0 + fi + + # Default: don't run tests + echo "run_tests=false" >> "$GITHUB_OUTPUT" + echo "parallel_run_cmd=" >> "$GITHUB_OUTPUT" + echo "Skipping tests - unknown event type" + discover: runs-on: ubuntu-latest needs: should-run-tests + if: needs.should-run-tests.outputs.run_tests == 'true' outputs: test_dirs: ${{ steps.set-matrix.outputs.test_dirs }} - test_mode: ${{ needs.should-run-tests.outputs.test_mode }} parallel_run_args: ${{ steps.set-matrix.outputs.parallel_run_args }} job_timeout: ${{ steps.set-matrix.outputs.job_timeout }} # Project deletion flags (handled at workflow level to avoid race conditions) @@ -88,15 +181,25 @@ jobs: - id: set-matrix shell: bash run: | - # Discover test paths and create matrix entries for parallel runners. + # Parse test paths and flags, then create matrix entries for each path. + # Each unique path gets its own parallel GitHub runner. + # + # Sources (priority order): + # 1. [parallel_run.sh ...] from commit/PR → parse paths and flags + # 2. workflow_dispatch inputs → test_path + parallel_run_args + # 3. Default (no paths) → discover all leaf test folders # - # Sources: - # 1. workflow_dispatch inputs → test_path + parallel_run_args - # 2. Default → discover all leaf test folders (with mode-based exclusions) + # ALL paths (explicit or discovered) are expanded to leaf directories: + # - Files are kept as-is + # - Directories are expanded to their leaf sub-folders # - # Paths are expanded to leaf directories for maximum parallelism. + # Leaf discovery (Option A algorithm): + # - Leaf directories (test files, no test subdirs) → one job per directory + # - Mixed directories (test files AND test subdirs) → one bundled job for + # all direct test files (space-separated), plus recursive jobs for subdirs + # This maximizes parallelism regardless of how paths are specified. - TEST_MODE="${{ needs.should-run-tests.outputs.test_mode }}" + PARALLEL_CMD="${{ needs.should-run-tests.outputs.parallel_run_cmd }}" INPUT_PATH="${{ inputs.test_path }}" INPUT_ARGS="${{ inputs.parallel_run_args }}" @@ -105,15 +208,31 @@ jobs: declare -a PATHS=() declare -a FLAGS=() - # Build --exclude args for lightweight mode - EXCLUDE_ARGS="" - if [[ "$TEST_MODE" == "lightweight" ]]; then - EXCLUDE_ARGS="--exclude tests/actor --exclude tests/conversation_manager --exclude tests/async_tool_loop" - echo "Lightweight mode: excluding tests/actor, tests/conversation_manager, tests/async_tool_loop" >&2 - fi - - if [[ "$TEST_MODE" == "manual" && -n "$INPUT_PATH" && "$INPUT_PATH" != "." ]]; then + if [[ -n "$PARALLEL_CMD" ]]; then + # Parse [parallel_run.sh ...] from commit/PR message + # Separate flags (--env, --eval-only, etc.) from paths + echo "Parsing [parallel_run.sh $PARALLEL_CMD]" >&2 + LAST_FLAG="" + for token in $PARALLEL_CMD; do + if [[ "$token" == --* ]]; then + # It's a flag - collect it and its value if needed + FLAGS+=("$token") + LAST_FLAG="$token" + elif [[ "$LAST_FLAG" == "--env" || "$LAST_FLAG" == "--tags" || "$LAST_FLAG" == "-j" || "$LAST_FLAG" == "--jobs" || "$LAST_FLAG" == "-t" || "$LAST_FLAG" == "--timeout" ]]; then + # Previous token was a flag that takes a value + FLAGS+=("$token") + LAST_FLAG="" + else + # It's a path - collect for expansion + RAW_PATHS+=("$token") + LAST_FLAG="" + fi + done + elif [[ -n "$INPUT_PATH" && "$INPUT_PATH" != "." ]]; then + # workflow_dispatch with specific path(s) + # Split space-separated paths into array read -ra RAW_PATHS <<< "$INPUT_PATH" + # Flags come from parallel_run_args input if [[ -n "$INPUT_ARGS" ]]; then read -ra FLAGS <<< "$INPUT_ARGS" fi @@ -121,21 +240,28 @@ jobs: fi # Expand paths to leaf directories using discover_test_paths.py + # - If RAW_PATHS is empty, discover all test paths from tests/ + # - If RAW_PATHS has entries, expand each directory to its leaf sub-folders + # This ensures maximum parallelism regardless of how paths are specified. if (( ${#RAW_PATHS[@]} == 0 )); then + # No explicit paths - discover all while IFS= read -r path; do PATHS+=("$path") - done < <(python3 .github/scripts/discover_test_paths.py $EXCLUDE_ARGS) - if [[ "$TEST_MODE" == "manual" && -n "$INPUT_ARGS" ]]; then + done < <(python3 .github/scripts/discover_test_paths.py) + # Flags come from parallel_run_args input (if any) + if [[ -n "$INPUT_ARGS" ]]; then read -ra FLAGS <<< "$INPUT_ARGS" fi echo "Discovered ${#PATHS[@]} test paths (leaf dirs + individual files)" >&2 else + # Expand explicit paths to their leaf directories while IFS= read -r path; do PATHS+=("$path") - done < <(python3 .github/scripts/discover_test_paths.py $EXCLUDE_ARGS "${RAW_PATHS[@]}") + done < <(python3 .github/scripts/discover_test_paths.py "${RAW_PATHS[@]}") echo "Expanded ${#RAW_PATHS[@]} path(s) to ${#PATHS[@]} leaf test paths" >&2 fi + # Build JSON array of paths for matrix if (( ${#PATHS[@]} == 0 )); then echo "Error: No test paths found" >&2 exit 1 @@ -155,9 +281,12 @@ jobs: # to .env in the pytest job. We only parse it here for project mgmt flags. # ===================================================================== - DELETE_ON_START="false" - DELETE_ON_EXIT="false" - RANDOM_PROJECTS="false" + # CI defaults for project management (can be overridden) + # With local orchestra (default), these are ignored - each job has fresh DB + # Only relevant when ORCHESTRA_URL is set to a non-local URL (staging/production) + DELETE_ON_START="false" # Default: no deletion (local orchestra has fresh DB) + DELETE_ON_EXIT="false" # Default: keep project after tests + RANDOM_PROJECTS="false" # Default: shared project mode # Orchestra branch precedence (highest to lowest): # 1. --env LOCAL_ORCHESTRA_BRANCH=xxx (handled below in FLAGS parsing) @@ -179,9 +308,11 @@ jobs: fi # Parse env_file_content for project management flags only + # (actual env vars are written to .env in pytest job, not passed as --env args) ENV_FILE_CONTENT_RAW="${{ inputs.env_file_content }}" if [[ -n "$ENV_FILE_CONTENT_RAW" ]]; then echo "Parsing env_file_content for project management flags..." >&2 + # Try to decode as base64; if it fails or produces garbage, assume raw content if DECODED=$(echo "$ENV_FILE_CONTENT_RAW" | base64 -d 2>/dev/null) && [[ "$DECODED" == *$'\n'* || "$DECODED" == *"="* ]]; then ENV_FILE_CONTENT="$DECODED" else @@ -189,13 +320,20 @@ jobs: fi if [[ -n "$ENV_FILE_CONTENT" ]]; then while IFS= read -r line || [[ -n "$line" ]]; do + # Skip empty lines and comments [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue + + # Extract KEY=VALUE (handles quotes) if [[ "$line" =~ ^[[:space:]]*([A-Za-z_][A-Za-z0-9_]*)[[:space:]]*=[[:space:]]*(.*)[[:space:]]*$ ]]; then key="${BASH_REMATCH[1]}" value="${BASH_REMATCH[2]}" + + # Strip surrounding quotes if present if [[ "$value" =~ ^\"(.*)\"$ ]] || [[ "$value" =~ ^\'(.*)\'$ ]]; then value="${BASH_REMATCH[1]}" fi + + # Only check for project management flags (other vars handled via .env file) case "$key=$value" in UNIFY_TESTS_DELETE_PROJ_ON_START=true|UNIFY_TESTS_DELETE_PROJ_ON_START=True|UNIFY_TESTS_DELETE_PROJ_ON_START=1) DELETE_ON_START="true" @@ -232,6 +370,7 @@ jobs: fi # Process FLAGS (from parallel_run_args) - highest priority + # These are explicit --env args passed via commit message or workflow dispatch declare -a CLEANED_FLAGS=() skip_next=false @@ -244,6 +383,7 @@ jobs: flag="${FLAGS[$i]}" next_val="${FLAGS[$((i+1))]:-}" + # Check for --env with project management flags (strip them, handle at workflow level) if [[ "$flag" == "--env" ]]; then case "$next_val" in UNIFY_TESTS_DELETE_PROJ_ON_START=true|UNIFY_TESTS_DELETE_PROJ_ON_START=True|UNIFY_TESTS_DELETE_PROJ_ON_START=1) @@ -272,7 +412,7 @@ jobs: ;; UNIFY_TESTS_RAND_PROJ=true|UNIFY_TESTS_RAND_PROJ=True|UNIFY_TESTS_RAND_PROJ=1) RANDOM_PROJECTS="true" - CLEANED_FLAGS+=("$flag" "$next_val") + CLEANED_FLAGS+=("$flag" "$next_val") # Keep for runners skip_next=true echo " args: RANDOM_PROJECTS=true (override)" >&2 continue @@ -306,6 +446,8 @@ jobs: echo "random_projects=$RANDOM_PROJECTS" >> "$GITHUB_OUTPUT" echo "orchestra_branch=$ORCHESTRA_BRANCH" >> "$GITHUB_OUTPUT" + # Build parallel_run_args from explicit flags only (not env_file content) + # env_file content is handled separately by writing to .env on the runner if (( ${#CLEANED_FLAGS[@]} > 0 )); then flags_str="${CLEANED_FLAGS[*]}" echo "parallel_run_args=$flags_str" >> "$GITHUB_OUTPUT" @@ -330,6 +472,7 @@ jobs: needs: [should-run-tests, discover] # Only run if DELETE_ON_START is requested AND we're in shared project mode if: | + needs.should-run-tests.outputs.run_tests == 'true' && needs.discover.outputs.delete_on_start == 'true' && needs.discover.outputs.random_projects != 'true' env: @@ -373,6 +516,7 @@ jobs: needs: [should-run-tests, discover, setup] if: | always() && + needs.should-run-tests.outputs.run_tests == 'true' && (needs.setup.result == 'success' || needs.setup.result == 'skipped') strategy: fail-fast: false @@ -694,30 +838,28 @@ jobs: run: | set +e - TEST_MODE="${{ needs.discover.outputs.test_mode }}" + # parallel_run_args comes from discover job (parsed from commit message or inputs) EXTRA_ARGS="${{ needs.discover.outputs.parallel_run_args }}" TEST_PATH="${{ steps.normalize-path.outputs.test_args }}" TIMEOUT="${{ steps.normalize-path.outputs.timeout_seconds }}" - # Determine pytest marker args based on test mode - MARKER_ARGS="" - if [[ "$TEST_MODE" == "lightweight" ]]; then - MARKER_ARGS='-- -m "not llm_call"' - fi - + # Start Xvfb manually instead of using xvfb-run. + # xvfb-run doesn't propagate signals to child processes, making + # workflow cancellation ineffective. This approach ensures SIGTERM + # reaches parallel_run.sh directly. Xvfb :99 -screen 0 1920x1080x24 & XVFB_PID=$! export DISPLAY=:99 + + # Ensure Xvfb is cleaned up on exit trap "kill $XVFB_PID 2>/dev/null" EXIT + # parallel_run.sh handles starting local orchestra automatically + # Note: TEST_PATH must come before EXTRA_ARGS because EXTRA_ARGS may contain + # "-- pytest-args" and everything after -- is treated as pytest arguments # TEST_PATH is unquoted to allow word splitting for bundled multi-file entries - if [[ -n "$MARKER_ARGS" ]]; then - echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $MARKER_ARGS" - tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $MARKER_ARGS - else - echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $EXTRA_ARGS" - tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $EXTRA_ARGS - fi + echo "Running: parallel_run.sh --timeout $TIMEOUT $TEST_PATH $EXTRA_ARGS" + tests/parallel_run.sh --timeout "$TIMEOUT" $TEST_PATH $EXTRA_ARGS TEST_EXIT_CODE=$? echo "exit_code=$TEST_EXIT_CODE" >> "$GITHUB_OUTPUT" exit $TEST_EXIT_CODE @@ -972,6 +1114,7 @@ jobs: # Cancelled runs likely left partial test data that should be cleaned up if: | always() && + needs.should-run-tests.outputs.run_tests == 'true' && needs.discover.outputs.random_projects != 'true' && (needs.discover.outputs.delete_on_exit == 'true' || needs.pytest.result == 'cancelled') env: @@ -1008,7 +1151,7 @@ jobs: name: Consolidate and store cache runs-on: ubuntu-latest needs: [should-run-tests, pytest, cleanup] - if: always() + if: always() && needs.should-run-tests.outputs.run_tests == 'true' steps: - uses: actions/checkout@v4 From f6713d380b1c01d79e8bca5b05a2d72769cc0f2c Mon Sep 17 00:00:00 2001 From: CatB1t Date: Fri, 10 Apr 2026 12:12:50 +0200 Subject: [PATCH 14/14] ci(lightweight): disable checks on push, keep for PRs to staging & main --- .github/workflows/tests_lightweight.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/tests_lightweight.yml b/.github/workflows/tests_lightweight.yml index 2c33e5c33..666330d52 100644 --- a/.github/workflows/tests_lightweight.yml +++ b/.github/workflows/tests_lightweight.yml @@ -9,11 +9,6 @@ run-name: >- }} on: - push: - branches: - - staging - - main - pull_request: branches: - main