From c51c24fbc77cb7cc419e061b04b8ed951fdbb498 Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Fri, 29 Aug 2025 14:47:50 +0000 Subject: [PATCH 1/6] refactor: split CI workflows by trigger type fix linting remove changes ci job one ci workflow fix lint add on all triggers fix authorize job fixx authorize update name --- .github/workflows/ci-main.yml | 245 ++++++++++ .github/workflows/ci-pull-request-target.yml | 276 +++++++++++ .github/workflows/ci-pull-request.yml | 259 +++++++++++ .github/workflows/tests.yml | 460 ++++++++++++------- 4 files changed, 1073 insertions(+), 167 deletions(-) create mode 100644 .github/workflows/ci-main.yml create mode 100644 .github/workflows/ci-pull-request-target.yml create mode 100644 .github/workflows/ci-pull-request.yml diff --git a/.github/workflows/ci-main.yml b/.github/workflows/ci-main.yml new file mode 100644 index 00000000..c27c5e0c --- /dev/null +++ b/.github/workflows/ci-main.yml @@ -0,0 +1,245 @@ +name: CI - Push to Main + +on: + push: + branches: [main] + paths-ignore: + - '*.detignore' + - '**/*.detignore' + - '*.example' + - '**/*.example' + - '*.json' + - '**/*.json' + - '*.md' + - '**/*.md' + - '*.png' + - '**/*.png' + - '*.sample' + - '**/*.sample' + - '*.TAG' + - '**/*.TAG' + - '*.yaml' + - '**/*.yaml' + - '*.yml' + - '**/*.yml' + - '!/.github/workflows/**' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'changelog/**' + +permissions: + contents: read + packages: write + +concurrency: + group: ${{ github.workflow }}-${{ github.sha }} + cancel-in-progress: false # Don't cancel main branch builds + +env: + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + with: + github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} + - name: Run Pre-Commit + run: | + poetry run pre-commit install + poetry run pre-commit run -a + - name: Run MyPy + run: | + poetry run mypy src tests utils + + hf-datasets-cache: + runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately + container: derskythe/github-runner-base:ubuntu-noble # has the right python, sudo and curl:) + steps: + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + with: + github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} + - name: Huggingface datasets cache + uses: actions/cache@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env + key: hf-datasets-${{ github.run_id }} + restore-keys: | + hf-datasets- + - name: Download datasets + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export PATH=$HOME/.local/bin:$PATH + poetry run python -c "from eval_framework.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" + + + build: + needs: [lint] + runs-on: cpu-runner-8c-32gb-01 + container: docker:dind + env: + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + REPO_OWNER_LC: aleph-alpha-research + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Registry Authentication + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Docker BuildX + uses: docker/setup-buildx-action@v1 + + - name: Build and Push Image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest + + test-cpu: + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [lint, hf-datasets-cache] + steps: + - uses: actions/checkout@v4 + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + - name: Run tests + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + + test-cpu-slow: + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [lint, hf-datasets-cache] + steps: + - uses: actions/checkout@v4 + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + - name: Run tests + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run python -c "import nltk; nltk.download('punkt_tab')" + poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" + + test-gpu: + needs: [build, test-cpu, test-cpu-slow] + runs-on: EvalFrameworkGPURunner + if: ${{ always() + && needs.test-cpu.result == 'success' + && needs.test-cpu-slow.result == 'success' + && needs.build.result == 'success' }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Normalize repo owner to lowercase + run: | + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' + + test-gpu-vllm: + needs: [build, test-cpu, test-cpu-slow, test-gpu] + runs-on: EvalFrameworkGPURunner + if: ${{ always() + && needs.test-cpu.result == 'success' + && needs.test-cpu-slow.result == 'success' + && needs.build.result == 'success' + && (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Normalize repo owner to lowercase + run: | + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + -e VLLM_LOGGING_LEVEL=DEBUG \ + -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ + -e VLLM_USE_MODELSCOPE=False \ + -e VLLM_NCCL_SO_PATH="" \ + -e VLLM_USE_TRITON_FLASH_ATTN=0 \ + -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' diff --git a/.github/workflows/ci-pull-request-target.yml b/.github/workflows/ci-pull-request-target.yml new file mode 100644 index 00000000..41349fc7 --- /dev/null +++ b/.github/workflows/ci-pull-request-target.yml @@ -0,0 +1,276 @@ +name: CI - Pull Request Target + +on: + pull_request_target: + types: [opened, reopened, synchronize, labeled] + branches: [main] + paths-ignore: + - '*.detignore' + - '**/*.detignore' + - '*.example' + - '**/*.example' + - '*.json' + - '**/*.json' + - '*.md' + - '**/*.md' + - '*.png' + - '**/*.png' + - '*.sample' + - '**/*.sample' + - '*.TAG' + - '**/*.TAG' + - '*.yaml' + - '**/*.yaml' + - '*.yml' + - '**/*.yml' + - '!/.github/workflows/**' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'changelog/**' + +permissions: + contents: read + packages: write + pull-requests: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets + +jobs: + # Security check for pull_request_target + authorize-fork-pr: + runs-on: ubuntu-latest + outputs: + should_run: ${{ steps.check.outputs.should_run }} + steps: + - name: Check authorization + id: check + run: | + # Check if PR is from a fork + if [[ "${{ github.event.pull_request.head.repo.fork }}" == "false" ]]; then + echo "PR is from the same repository. Skipping pull_request_target workflow." + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # If from fork, check for trusted_contributor label + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }}" == "true" ]]; then + echo "PR is from a fork with 'trusted_contributor' label. Authorized!" + echo "should_run=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Otherwise, don't run + echo "PR is from a fork without 'trusted_contributor' label. Skipping." + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + + + lint: + needs: authorize-fork-pr + if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + with: + github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} + - name: Run Pre-Commit + run: | + poetry run pre-commit install + poetry run pre-commit run -a + - name: Run MyPy + run: | + poetry run mypy src tests utils + + + build: + needs: [authorize-fork-pr, lint] + if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true'}} + runs-on: cpu-runner-8c-32gb-01 + container: docker:dind + env: + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + REPO_OWNER_LC: aleph-alpha-research + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Registry Authentication + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Docker BuildX + uses: docker/setup-buildx-action@v1 + + - name: Build and Push Image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest + + test-cpu: + needs: [authorize-fork-pr, lint] + if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + - name: Run tests + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + + test-cpu-slow: + needs: [authorize-fork-pr, lint] + if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + - name: Run tests + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run python -c "import nltk; nltk.download('punkt_tab')" + poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" + + test-gpu: + needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow] + runs-on: EvalFrameworkGPURunner + if: ${{ always() + && needs.authorize-fork-pr.outputs.should_run == 'true' + && needs.test-cpu.result == 'success' + && needs.test-cpu-slow.result == 'success' + && (needs.build.result == 'success' || needs.build.result == 'skipped') }} + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Normalize repo owner to lowercase + run: | + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' + + test-gpu-vllm: + needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow, test-gpu] + runs-on: EvalFrameworkGPURunner + if: ${{ always() + && needs.authorize-fork-pr.outputs.should_run == 'true' + && needs.test-cpu.result == 'success' + && needs.test-cpu-slow.result == 'success' + && (needs.build.result == 'success' || needs.build.result == 'skipped') + && (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') }} + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Normalize repo owner to lowercase + run: | + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + -e VLLM_LOGGING_LEVEL=DEBUG \ + -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ + -e VLLM_USE_MODELSCOPE=False \ + -e VLLM_NCCL_SO_PATH="" \ + -e VLLM_USE_TRITON_FLASH_ATTN=0 \ + -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' diff --git a/.github/workflows/ci-pull-request.yml b/.github/workflows/ci-pull-request.yml new file mode 100644 index 00000000..88843188 --- /dev/null +++ b/.github/workflows/ci-pull-request.yml @@ -0,0 +1,259 @@ +name: CI - Pull Request + +on: + push: + branches: [main] + paths-ignore: + - '**.md' + pull_request: + types: [opened, reopened, synchronize, labeled] + paths-ignore: + - '**.md' + # Manually trigger a workflow for a branch + workflow_dispatch: + # Merge queue trigger + merge_group: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + REGISTRY: registry.gitlab.aleph-alpha.de + REPO_OWNER: research/public-registry + IMAGE_NAME: eval_framework + HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth + UV_LINK_MODE: symlink + UV_LOCKED: 1 + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" + + - name: Run Pre-Commit + run: uvx pre-commit run --all-files + + - name: Dependency check + run: ./utils/dependency_check.sh + + - name: Run MyPy + run: uv run --all-extras mypy + + hf-datasets-cache: + runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately + steps: + - uses: actions/checkout@v4 + if: github.ref == 'refs/heads/main' + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + if: github.ref == 'refs/heads/main' + with: + version: "~=0.8.16" + + - name: Huggingface datasets cache + uses: actions/cache@v4 + if: github.ref == 'refs/heads/main' + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env + key: hf-datasets-${{ github.run_id }} + restore-keys: | + hf-datasets- + + - name: Download datasets + if: github.ref == 'refs/heads/main' + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: uv run --extra=comet --extra=openai python -c "from eval_framework.tasks.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" + + tag: + # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.set-tag.outputs.tag }} + image: ${{ steps.set-tag.outputs.image }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set Tag + id: set-tag + run: | + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + TAG='latest' + else + # head_ref is the correct branch name for PRs + BRANCH_NAME=${{ github.head_ref || github.ref_name }} + # Convert slashes with hyphens and ensure valid Docker tag format + TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) + fi + echo "tag=$TAG" >> $GITHUB_OUTPUT + echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT + + - name: Output Docker Tag + run: | + echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" + echo "Docker image: ${{ steps.set-tag.outputs.image }}" + + build: + # Build and Push Docker Image (GPU Runs) + needs: [lint, tag] + runs-on: cpu-runner-8c-32gb-01 + container: docker:dind + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Registry Authentication + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: token + password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} + + - name: Setup Docker BuildX + uses: docker/setup-buildx-action@v1 + + - name: Build and Push Image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + tags: ${{ needs.tag.outputs.image }} + + test-extras: + # Test uv installs (CPU) + runs-on: ubuntu-latest + needs: [lint] + strategy: + fail-fast: false + matrix: + extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional'] + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" + + - name: Verify install and functionality via uv --exact + run: | + if [ "${{ matrix.extras }}" != "" ]; then + echo "Testing extra: ${{ matrix.extras }}" + uv run --exact --extra ${{ matrix.extras }} pytest -v --noconftest tests/tests_eval_framework/installs/test_${{ matrix.extras }}.py + else + echo "Testing core install" + uv run --exact pytest --noconftest -v tests/tests_eval_framework/installs/test_core.py + fi + + test-cpu: + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [hf-datasets-cache, test-extras] + steps: + - uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Run tests + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: uv run --all-extras pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + + test-cpu-slow: + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [hf-datasets-cache, test-extras] + steps: + - uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Run tests + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: | + uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')" # otherwise there's a race condition in ntltk + uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" + + test-docker-gpu: + # Run full test suite in Docker Container with GPU + runs-on: EvalFrameworkGPURunner + needs: [tag, build, test-cpu, test-cpu-slow] + container: + image: "${{ needs.tag.outputs.image }}" + credentials: + username: token + password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} + options: --gpus all + defaults: + run: + working-directory: /eval_framework + steps: + - name: Verify GPU installs via uv --exact + run: | + set -e # fail fast if any test fails + + echo "Testing vllm extra" + uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py + + echo "Testing mistral extra" + uv run --exact --extra mistral pytest -v --noconftest tests/tests_eval_framework/installs/test_mistral.py + + echo "Testing all extras together" + uv run --exact --all-extras pytest -v --noconftest tests/tests_eval_framework/installs/ + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Test GPU + timeout-minutes: 20 + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm" + + - name: Test VLLM + timeout-minutes: 20 + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + VLLM_LOGGING_LEVEL: DEBUG + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: False + VLLM_NCCL_SO_PATH: "" + VLLM_USE_TRITON_FLASH_ATTN: 0 + VLLM_DISABLE_CUSTOM_ALL_REDUCE: 1 + run: pytest --log-cli-level=INFO -v -m "vllm" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3ec704d6..8e070659 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,124 +2,219 @@ name: CI on: push: - branches: [main] + branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging paths-ignore: - - '**.md' + - '*.detignore' + - '**/*.detignore' + - '*.example' + - '**/*.example' + - '*.json' + - '**/*.json' + - '*.md' + - '**/*.md' + - '*.png' + - '**/*.png' + - '*.sample' + - '**/*.sample' + - '*.TAG' + - '**/*.TAG' + - '*.yaml' + - '**/*.yaml' + - '*.yml' + - '**/*.yml' + - '!/.github/workflows/**' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'changelog/**' pull_request: types: [opened, reopened, synchronize, labeled] + branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging paths-ignore: - - '**.md' - # Manually trigger a workflow for a branch - workflow_dispatch: - # Merge queue trigger - merge_group: + - '*.detignore' + - '**/*.detignore' + - '*.example' + - '**/*.example' + - '*.json' + - '**/*.json' + - '*.md' + - '**/*.md' + - '*.png' + - '**/*.png' + - '*.sample' + - '**/*.sample' + - '*.TAG' + - '**/*.TAG' + - '*.yaml' + - '**/*.yaml' + - '*.yml' + - '**/*.yml' + - '!/.github/workflows/**' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'changelog/**' + pull_request_target: + types: [opened, reopened, synchronize, labeled] + branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging + paths-ignore: + - '*.detignore' + - '**/*.detignore' + - '*.example' + - '**/*.example' + - '*.json' + - '**/*.json' + - '*.md' + - '**/*.md' + - '*.png' + - '**/*.png' + - '*.sample' + - '**/*.sample' + - '*.TAG' + - '**/*.TAG' + - '*.yaml' + - '**/*.yaml' + - '*.yml' + - '**/*.yml' + - '!/.github/workflows/**' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'changelog/**' permissions: contents: read + packages: write + pull-requests: read concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: ${{ github.event_name != 'push' || github.ref != 'refs/heads/main' }} env: - REGISTRY: registry.gitlab.aleph-alpha.de - REPO_OWNER: research/public-registry - IMAGE_NAME: eval_framework - HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth - UV_LINK_MODE: symlink - UV_LOCKED: 1 + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets jobs: + # Security check for pull_request_target + authorize-fork-pr: + runs-on: ubuntu-latest + outputs: + should_run: ${{ steps.check.outputs.should_run }} + steps: + - name: Check authorization + id: check + run: | + # For push events, always run + if [[ "${{ github.event_name }}" == "push" ]]; then + echo "Event is push. Running workflow." + echo "should_run=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # For pull_request events, check if it's from a fork + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + # GitHub doesn't always populate fork info for pull_request events + # So we'll check if the head repo name differs from the base repo name + if [[ "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.event.pull_request.base.repo.full_name }}" ]]; then + echo "PR is from the same repository. Running workflow." + echo "should_run=true" >> $GITHUB_OUTPUT + exit 0 + else + echo "PR is from a fork. Skipping pull_request workflow." + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + fi + + # For pull_request_target, check if it's from a fork + if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then + # Check if PR is from a fork + if [[ "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.event.pull_request.base.repo.full_name }}" ]]; then + echo "PR is from the same repository. Skipping pull_request_target workflow." + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # If from fork, check for trusted_contributor label + if [[ "${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }}" == "true" ]]; then + echo "PR is from a fork with 'trusted_contributor' label. Authorized!" + echo "should_run=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Otherwise, don't run + echo "PR is from a fork without 'trusted_contributor' label. Skipping." + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + lint: - runs-on: ubuntu-latest # default runner runs out of disk space due to hf cache + runs-on: ubuntu-latest + needs: [authorize-fork-pr] + if: needs.authorize-fork-pr.outputs.should_run == 'true' steps: - name: Checkout Repository uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 with: - version: "~=0.8.16" - + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu + with: + github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} - name: Run Pre-Commit - run: uvx pre-commit run --all-files - - - name: Dependency check - run: ./utils/dependency_check.sh - + run: | + poetry run pre-commit install + poetry run pre-commit run -a - name: Run MyPy - run: uv run --all-extras mypy + run: | + poetry run mypy src tests utils hf-datasets-cache: - runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [authorize-fork-pr] + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.authorize-fork-pr.outputs.should_run == 'true' steps: - - uses: actions/checkout@v4 - if: github.ref == 'refs/heads/main' - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - if: github.ref == 'refs/heads/main' + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu with: - version: "~=0.8.16" - + github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} - name: Huggingface datasets cache uses: actions/cache@v4 - if: github.ref == 'refs/heads/main' with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env + path: ${{ env.HF_DATASET_CACHE_DIR }} key: hf-datasets-${{ github.run_id }} restore-keys: | hf-datasets- - - name: Download datasets - if: github.ref == 'refs/heads/main' - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: uv run --extra=comet --extra=openai python -c "from eval_framework.tasks.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" - - tag: - # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.set-tag.outputs.tag }} - image: ${{ steps.set-tag.outputs.image }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set Tag - id: set-tag - run: | - if [ "${{ github.ref }}" == "refs/heads/main" ]; then - TAG='latest' - else - # head_ref is the correct branch name for PRs - BRANCH_NAME=${{ github.head_ref || github.ref_name }} - # Convert slashes with hyphens and ensure valid Docker tag format - TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) - fi - echo "tag=$TAG" >> $GITHUB_OUTPUT - echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT - - - name: Output Docker Tag - run: | - echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" - echo "Docker image: ${{ steps.set-tag.outputs.image }}" + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export PATH=$HOME/.local/bin:$PATH + poetry run python -c "from eval_framework.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" build: - # Build and Push Docker Image (GPU Runs) - needs: [lint, tag] + needs: [authorize-fork-pr, lint] runs-on: cpu-runner-8c-32gb-01 container: docker:dind + if: needs.authorize-fork-pr.outputs.should_run == 'true' && needs.lint.result == 'success' + env: + REGISTRY: ghcr.io + IMAGE_NAME: eval_framework_public + REPO_OWNER_LC: aleph-alpha-research steps: - name: Checkout Repository uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - name: Registry Authentication - uses: docker/login-action@v3 + uses: docker/login-action@v2 with: registry: ${{ env.REGISTRY }} - username: token - password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - name: Setup Docker BuildX uses: docker/setup-buildx-action@v1 @@ -130,130 +225,161 @@ jobs: context: . file: Dockerfile push: true - tags: ${{ needs.tag.outputs.image }} - - test-extras: - # Test uv installs (CPU) - runs-on: ubuntu-latest - needs: [lint] - strategy: - fail-fast: false - matrix: - extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional'] - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - with: - version: "~=0.8.16" - - - name: Verify install and functionality via uv --exact - run: | - if [ "${{ matrix.extras }}" != "" ]; then - echo "Testing extra: ${{ matrix.extras }}" - uv run --exact --extra ${{ matrix.extras }} pytest -v --noconftest tests/tests_eval_framework/installs/test_${{ matrix.extras }}.py - else - echo "Testing core install" - uv run --exact pytest --noconftest -v tests/tests_eval_framework/installs/test_core.py - fi + tags: | + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest test-cpu: runs-on: cpu-runner-8c-32gb-01 container: derskythe/github-runner-base:ubuntu-noble - needs: [hf-datasets-cache, test-extras] + needs: [authorize-fork-pr, lint, hf-datasets-cache] + if: | + always() && + needs.authorize-fork-pr.outputs.should_run == 'true' && + needs.lint.result == 'success' && + (needs.hf-datasets-cache.result == 'success' || needs.hf-datasets-cache.result == 'skipped') steps: - - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 + - name: Checkout Repository + uses: actions/checkout@v4 with: - version: "~=0.8.16" - + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path + path: ${{ env.HF_DATASET_CACHE_DIR }} key: hf-datasets- - - name: Run tests - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: uv run --all-extras pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + run: | + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" test-cpu-slow: runs-on: cpu-runner-8c-32gb-01 container: derskythe/github-runner-base:ubuntu-noble - needs: [hf-datasets-cache, test-extras] + needs: [authorize-fork-pr, lint, hf-datasets-cache] + if: | + always() && + needs.authorize-fork-pr.outputs.should_run == 'true' && + needs.lint.result == 'success' && + (needs.hf-datasets-cache.result == 'success' || needs.hf-datasets-cache.result == 'skipped') steps: - - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 + - name: Checkout Repository + uses: actions/checkout@v4 with: - version: "~=0.8.16" - + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} + - name: Setup poetry + uses: ./.github/actions/setup-poetry-on-ubuntu - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path + path: ${{ env.HF_DATASET_CACHE_DIR }} key: hf-datasets- - - name: Run tests - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} run: | - uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')" # otherwise there's a race condition in ntltk - uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - test-docker-gpu: - # Run full test suite in Docker Container with GPU + export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} + export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} + export PATH=$HOME/.local/bin:$PATH + poetry run python -c "import nltk; nltk.download('punkt_tab')" + poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" + + test-gpu: + needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow] runs-on: EvalFrameworkGPURunner - needs: [tag, build, test-cpu, test-cpu-slow] - container: - image: "${{ needs.tag.outputs.image }}" - credentials: - username: token - password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} - options: --gpus all - defaults: - run: - working-directory: /eval_framework + if: | + always() && + needs.authorize-fork-pr.outputs.should_run == 'true' && + needs.test-cpu.result == 'success' && + needs.test-cpu-slow.result == 'success' && + (needs.build.result == 'success' || (github.event_name != 'push' && needs.build.result == 'skipped')) steps: - - name: Verify GPU installs via uv --exact + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} + + - name: Normalize repo owner to lowercase run: | - set -e # fail fast if any test fails + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + + - name: Huggingface datasets cache + uses: actions/cache/restore@v4 + with: + path: ${{ env.HF_DATASET_CACHE_DIR }} + key: hf-datasets- + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - echo "Testing vllm extra" - uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - echo "Testing mistral extra" - uv run --exact --extra mistral pytest -v --noconftest tests/tests_eval_framework/installs/test_mistral.py + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' + + test-gpu-vllm: + needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow, test-gpu] + runs-on: EvalFrameworkGPURunner + if: | + always() && + needs.authorize-fork-pr.outputs.should_run == 'true' && + needs.test-cpu.result == 'success' && + needs.test-cpu-slow.result == 'success' && + (needs.build.result == 'success' || (github.event_name != 'push' && needs.build.result == 'skipped')) && + (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - echo "Testing all extras together" - uv run --exact --all-extras pytest -v --noconftest tests/tests_eval_framework/installs/ + - name: Normalize repo owner to lowercase + run: | + echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path + path: ${{ env.HF_DATASET_CACHE_DIR }} key: hf-datasets- - - name: Test GPU - timeout-minutes: 20 - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm" + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull container image + run: | + docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - - name: Test VLLM - timeout-minutes: 20 - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - VLLM_LOGGING_LEVEL: DEBUG - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: False - VLLM_NCCL_SO_PATH: "" - VLLM_USE_TRITON_FLASH_ATTN: 0 - VLLM_DISABLE_CUSTOM_ALL_REDUCE: 1 - run: pytest --log-cli-level=INFO -v -m "vllm" + - name: Run GPU tests inside container with GPU access + run: | + timeout 20m docker run --rm --gpus all \ + -v ${{ github.workspace }}:/eval_framework \ + -w /eval_framework \ + -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ + -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ + -e VLLM_LOGGING_LEVEL=DEBUG \ + -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ + -e VLLM_USE_MODELSCOPE=False \ + -e VLLM_NCCL_SO_PATH="" \ + -e VLLM_USE_TRITON_FLASH_ATTN=0 \ + -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ + bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' From e4c831a8b3fb76b6a526b491db13966dd4cb92cf Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Mon, 1 Dec 2025 09:59:03 +0000 Subject: [PATCH 2/6] ci: add label-based authorization for fork PRs --- .github/workflows/ci-main.yml | 245 ---------- .github/workflows/ci-pull-request-target.yml | 276 ----------- .github/workflows/ci-pull-request.yml | 259 ---------- .github/workflows/tests.yml | 468 ++++++++----------- 4 files changed, 200 insertions(+), 1048 deletions(-) delete mode 100644 .github/workflows/ci-main.yml delete mode 100644 .github/workflows/ci-pull-request-target.yml delete mode 100644 .github/workflows/ci-pull-request.yml diff --git a/.github/workflows/ci-main.yml b/.github/workflows/ci-main.yml deleted file mode 100644 index c27c5e0c..00000000 --- a/.github/workflows/ci-main.yml +++ /dev/null @@ -1,245 +0,0 @@ -name: CI - Push to Main - -on: - push: - branches: [main] - paths-ignore: - - '*.detignore' - - '**/*.detignore' - - '*.example' - - '**/*.example' - - '*.json' - - '**/*.json' - - '*.md' - - '**/*.md' - - '*.png' - - '**/*.png' - - '*.sample' - - '**/*.sample' - - '*.TAG' - - '**/*.TAG' - - '*.yaml' - - '**/*.yaml' - - '*.yml' - - '**/*.yml' - - '!/.github/workflows/**' - - 'docs/**' - - 'LICENSE' - - '.gitignore' - - 'changelog/**' - -permissions: - contents: read - packages: write - -concurrency: - group: ${{ github.workflow }}-${{ github.sha }} - cancel-in-progress: false # Don't cancel main branch builds - -env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - with: - github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} - - name: Run Pre-Commit - run: | - poetry run pre-commit install - poetry run pre-commit run -a - - name: Run MyPy - run: | - poetry run mypy src tests utils - - hf-datasets-cache: - runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately - container: derskythe/github-runner-base:ubuntu-noble # has the right python, sudo and curl:) - steps: - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - with: - github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} - - name: Huggingface datasets cache - uses: actions/cache@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env - key: hf-datasets-${{ github.run_id }} - restore-keys: | - hf-datasets- - - name: Download datasets - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export PATH=$HOME/.local/bin:$PATH - poetry run python -c "from eval_framework.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" - - - build: - needs: [lint] - runs-on: cpu-runner-8c-32gb-01 - container: docker:dind - env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - REPO_OWNER_LC: aleph-alpha-research - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Registry Authentication - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup Docker BuildX - uses: docker/setup-buildx-action@v1 - - - name: Build and Push Image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest - - test-cpu: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [lint, hf-datasets-cache] - steps: - - uses: actions/checkout@v4 - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - name: Run tests - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" - - test-cpu-slow: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [lint, hf-datasets-cache] - steps: - - uses: actions/checkout@v4 - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - name: Run tests - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run python -c "import nltk; nltk.download('punkt_tab')" - poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - test-gpu: - needs: [build, test-cpu, test-cpu-slow] - runs-on: EvalFrameworkGPURunner - if: ${{ always() - && needs.test-cpu.result == 'success' - && needs.test-cpu-slow.result == 'success' - && needs.build.result == 'success' }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Normalize repo owner to lowercase - run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image - run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' - - test-gpu-vllm: - needs: [build, test-cpu, test-cpu-slow, test-gpu] - runs-on: EvalFrameworkGPURunner - if: ${{ always() - && needs.test-cpu.result == 'success' - && needs.test-cpu-slow.result == 'success' - && needs.build.result == 'success' - && (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Normalize repo owner to lowercase - run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image - run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - -e VLLM_LOGGING_LEVEL=DEBUG \ - -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ - -e VLLM_USE_MODELSCOPE=False \ - -e VLLM_NCCL_SO_PATH="" \ - -e VLLM_USE_TRITON_FLASH_ATTN=0 \ - -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' diff --git a/.github/workflows/ci-pull-request-target.yml b/.github/workflows/ci-pull-request-target.yml deleted file mode 100644 index 41349fc7..00000000 --- a/.github/workflows/ci-pull-request-target.yml +++ /dev/null @@ -1,276 +0,0 @@ -name: CI - Pull Request Target - -on: - pull_request_target: - types: [opened, reopened, synchronize, labeled] - branches: [main] - paths-ignore: - - '*.detignore' - - '**/*.detignore' - - '*.example' - - '**/*.example' - - '*.json' - - '**/*.json' - - '*.md' - - '**/*.md' - - '*.png' - - '**/*.png' - - '*.sample' - - '**/*.sample' - - '*.TAG' - - '**/*.TAG' - - '*.yaml' - - '**/*.yaml' - - '*.yml' - - '**/*.yml' - - '!/.github/workflows/**' - - 'docs/**' - - 'LICENSE' - - '.gitignore' - - 'changelog/**' - -permissions: - contents: read - packages: write - pull-requests: read - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - -env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets - -jobs: - # Security check for pull_request_target - authorize-fork-pr: - runs-on: ubuntu-latest - outputs: - should_run: ${{ steps.check.outputs.should_run }} - steps: - - name: Check authorization - id: check - run: | - # Check if PR is from a fork - if [[ "${{ github.event.pull_request.head.repo.fork }}" == "false" ]]; then - echo "PR is from the same repository. Skipping pull_request_target workflow." - echo "should_run=false" >> $GITHUB_OUTPUT - exit 0 - fi - - # If from fork, check for trusted_contributor label - if [[ "${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }}" == "true" ]]; then - echo "PR is from a fork with 'trusted_contributor' label. Authorized!" - echo "should_run=true" >> $GITHUB_OUTPUT - exit 0 - fi - - # Otherwise, don't run - echo "PR is from a fork without 'trusted_contributor' label. Skipping." - echo "should_run=false" >> $GITHUB_OUTPUT - exit 0 - - - lint: - needs: authorize-fork-pr - if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - with: - github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} - - name: Run Pre-Commit - run: | - poetry run pre-commit install - poetry run pre-commit run -a - - name: Run MyPy - run: | - poetry run mypy src tests utils - - - build: - needs: [authorize-fork-pr, lint] - if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true'}} - runs-on: cpu-runner-8c-32gb-01 - container: docker:dind - env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - REPO_OWNER_LC: aleph-alpha-research - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Registry Authentication - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup Docker BuildX - uses: docker/setup-buildx-action@v1 - - - name: Build and Push Image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest - - test-cpu: - needs: [authorize-fork-pr, lint] - if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - name: Run tests - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" - - test-cpu-slow: - needs: [authorize-fork-pr, lint] - if: ${{ needs.authorize-fork-pr.outputs.should_run == 'true' }} - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - name: Run tests - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run python -c "import nltk; nltk.download('punkt_tab')" - poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - test-gpu: - needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow] - runs-on: EvalFrameworkGPURunner - if: ${{ always() - && needs.authorize-fork-pr.outputs.should_run == 'true' - && needs.test-cpu.result == 'success' - && needs.test-cpu-slow.result == 'success' - && (needs.build.result == 'success' || needs.build.result == 'skipped') }} - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Normalize repo owner to lowercase - run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image - run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' - - test-gpu-vllm: - needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow, test-gpu] - runs-on: EvalFrameworkGPURunner - if: ${{ always() - && needs.authorize-fork-pr.outputs.should_run == 'true' - && needs.test-cpu.result == 'success' - && needs.test-cpu-slow.result == 'success' - && (needs.build.result == 'success' || needs.build.result == 'skipped') - && (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') }} - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Normalize repo owner to lowercase - run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image - run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" - - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - -e VLLM_LOGGING_LEVEL=DEBUG \ - -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ - -e VLLM_USE_MODELSCOPE=False \ - -e VLLM_NCCL_SO_PATH="" \ - -e VLLM_USE_TRITON_FLASH_ATTN=0 \ - -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' diff --git a/.github/workflows/ci-pull-request.yml b/.github/workflows/ci-pull-request.yml deleted file mode 100644 index 88843188..00000000 --- a/.github/workflows/ci-pull-request.yml +++ /dev/null @@ -1,259 +0,0 @@ -name: CI - Pull Request - -on: - push: - branches: [main] - paths-ignore: - - '**.md' - pull_request: - types: [opened, reopened, synchronize, labeled] - paths-ignore: - - '**.md' - # Manually trigger a workflow for a branch - workflow_dispatch: - # Merge queue trigger - merge_group: - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - -env: - REGISTRY: registry.gitlab.aleph-alpha.de - REPO_OWNER: research/public-registry - IMAGE_NAME: eval_framework - HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth - UV_LINK_MODE: symlink - UV_LOCKED: 1 - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - with: - version: "~=0.8.16" - - - name: Run Pre-Commit - run: uvx pre-commit run --all-files - - - name: Dependency check - run: ./utils/dependency_check.sh - - - name: Run MyPy - run: uv run --all-extras mypy - - hf-datasets-cache: - runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately - steps: - - uses: actions/checkout@v4 - if: github.ref == 'refs/heads/main' - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - if: github.ref == 'refs/heads/main' - with: - version: "~=0.8.16" - - - name: Huggingface datasets cache - uses: actions/cache@v4 - if: github.ref == 'refs/heads/main' - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env - key: hf-datasets-${{ github.run_id }} - restore-keys: | - hf-datasets- - - - name: Download datasets - if: github.ref == 'refs/heads/main' - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: uv run --extra=comet --extra=openai python -c "from eval_framework.tasks.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" - - tag: - # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.set-tag.outputs.tag }} - image: ${{ steps.set-tag.outputs.image }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set Tag - id: set-tag - run: | - if [ "${{ github.ref }}" == "refs/heads/main" ]; then - TAG='latest' - else - # head_ref is the correct branch name for PRs - BRANCH_NAME=${{ github.head_ref || github.ref_name }} - # Convert slashes with hyphens and ensure valid Docker tag format - TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) - fi - echo "tag=$TAG" >> $GITHUB_OUTPUT - echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT - - - name: Output Docker Tag - run: | - echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" - echo "Docker image: ${{ steps.set-tag.outputs.image }}" - - build: - # Build and Push Docker Image (GPU Runs) - needs: [lint, tag] - runs-on: cpu-runner-8c-32gb-01 - container: docker:dind - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Registry Authentication - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: token - password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} - - - name: Setup Docker BuildX - uses: docker/setup-buildx-action@v1 - - - name: Build and Push Image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile - push: true - tags: ${{ needs.tag.outputs.image }} - - test-extras: - # Test uv installs (CPU) - runs-on: ubuntu-latest - needs: [lint] - strategy: - fail-fast: false - matrix: - extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional'] - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - with: - version: "~=0.8.16" - - - name: Verify install and functionality via uv --exact - run: | - if [ "${{ matrix.extras }}" != "" ]; then - echo "Testing extra: ${{ matrix.extras }}" - uv run --exact --extra ${{ matrix.extras }} pytest -v --noconftest tests/tests_eval_framework/installs/test_${{ matrix.extras }}.py - else - echo "Testing core install" - uv run --exact pytest --noconftest -v tests/tests_eval_framework/installs/test_core.py - fi - - test-cpu: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [hf-datasets-cache, test-extras] - steps: - - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - with: - version: "~=0.8.16" - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Run tests - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: uv run --all-extras pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" - - test-cpu-slow: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [hf-datasets-cache, test-extras] - steps: - - uses: actions/checkout@v4 - - - name: Setup uv - uses: astral-sh/setup-uv@v6 - with: - version: "~=0.8.16" - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Run tests - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: | - uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')" # otherwise there's a race condition in ntltk - uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - test-docker-gpu: - # Run full test suite in Docker Container with GPU - runs-on: EvalFrameworkGPURunner - needs: [tag, build, test-cpu, test-cpu-slow] - container: - image: "${{ needs.tag.outputs.image }}" - credentials: - username: token - password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} - options: --gpus all - defaults: - run: - working-directory: /eval_framework - steps: - - name: Verify GPU installs via uv --exact - run: | - set -e # fail fast if any test fails - - echo "Testing vllm extra" - uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py - - echo "Testing mistral extra" - uv run --exact --extra mistral pytest -v --noconftest tests/tests_eval_framework/installs/test_mistral.py - - echo "Testing all extras together" - uv run --exact --all-extras pytest -v --noconftest tests/tests_eval_framework/installs/ - - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - - name: Test GPU - timeout-minutes: 20 - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - run: pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm" - - - name: Test VLLM - timeout-minutes: 20 - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} - VLLM_LOGGING_LEVEL: DEBUG - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: False - VLLM_NCCL_SO_PATH: "" - VLLM_USE_TRITON_FLASH_ATTN: 0 - VLLM_DISABLE_CUSTOM_ALL_REDUCE: 1 - run: pytest --log-cli-level=INFO -v -m "vllm" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8e070659..12ff7126 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,103 +2,46 @@ name: CI on: push: - branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging + branches: [main] paths-ignore: - - '*.detignore' - - '**/*.detignore' - - '*.example' - - '**/*.example' - - '*.json' - - '**/*.json' - - '*.md' - - '**/*.md' - - '*.png' - - '**/*.png' - - '*.sample' - - '**/*.sample' - - '*.TAG' - - '**/*.TAG' - - '*.yaml' - - '**/*.yaml' - - '*.yml' - - '**/*.yml' - - '!/.github/workflows/**' - - 'docs/**' - - 'LICENSE' - - '.gitignore' - - 'changelog/**' + - '**.md' pull_request: types: [opened, reopened, synchronize, labeled] - branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging paths-ignore: - - '*.detignore' - - '**/*.detignore' - - '*.example' - - '**/*.example' - - '*.json' - - '**/*.json' - - '*.md' - - '**/*.md' - - '*.png' - - '**/*.png' - - '*.sample' - - '**/*.sample' - - '*.TAG' - - '**/*.TAG' - - '*.yaml' - - '**/*.yaml' - - '*.yml' - - '**/*.yml' - - '!/.github/workflows/**' - - 'docs/**' - - 'LICENSE' - - '.gitignore' - - 'changelog/**' + - '**.md' pull_request_target: types: [opened, reopened, synchronize, labeled] - branches: [main, ci/split-workflows] # TEMPORARY: Remove ci/split-workflows before merging paths-ignore: - - '*.detignore' - - '**/*.detignore' - - '*.example' - - '**/*.example' - - '*.json' - - '**/*.json' - - '*.md' - - '**/*.md' - - '*.png' - - '**/*.png' - - '*.sample' - - '**/*.sample' - - '*.TAG' - - '**/*.TAG' - - '*.yaml' - - '**/*.yaml' - - '*.yml' - - '**/*.yml' - - '!/.github/workflows/**' - - 'docs/**' - - 'LICENSE' - - '.gitignore' - - 'changelog/**' + - '**.md' + # Manually trigger a workflow for a branch + workflow_dispatch: + # Merge queue trigger + merge_group: permissions: contents: read - packages: write pull-requests: read concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: ${{ github.event_name != 'push' || github.ref != 'refs/heads/main' }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets + REGISTRY: registry.gitlab.aleph-alpha.de + REPO_OWNER: research/public-registry + IMAGE_NAME: eval_framework + HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth + UV_LINK_MODE: symlink + UV_LOCKED: 1 + # Authorization context + EVENT: ${{ github.event_name }} + HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name || 'N/A' }} + BASE_REPO: ${{ github.event.pull_request.base.repo.full_name || 'N/A' }} + HAS_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }} jobs: - # Security check for pull_request_target - authorize-fork-pr: + # Security check for pull_request_target (fork PRs) + authorize: runs-on: ubuntu-latest outputs: should_run: ${{ steps.check.outputs.should_run }} @@ -106,103 +49,115 @@ jobs: - name: Check authorization id: check run: | - # For push events, always run - if [[ "${{ github.event_name }}" == "push" ]]; then - echo "Event is push. Running workflow." - echo "should_run=true" >> $GITHUB_OUTPUT - exit 0 - fi + echo "Event: $EVENT" + echo "Head repo: $HEAD_REPO" + echo "Base repo: $BASE_REPO" + echo "Has trusted_contributor label: $HAS_LABEL" - # For pull_request events, check if it's from a fork - if [[ "${{ github.event_name }}" == "pull_request" ]]; then - # GitHub doesn't always populate fork info for pull_request events - # So we'll check if the head repo name differs from the base repo name - if [[ "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.event.pull_request.base.repo.full_name }}" ]]; then - echo "PR is from the same repository. Running workflow." - echo "should_run=true" >> $GITHUB_OUTPUT - exit 0 - else - echo "PR is from a fork. Skipping pull_request workflow." - echo "should_run=false" >> $GITHUB_OUTPUT - exit 0 - fi + if [[ "$EVENT" == "push" || "$EVENT" == "workflow_dispatch" || "$EVENT" == "merge_group" ]]; then + SHOULD_RUN=true + elif [[ "$EVENT" == "pull_request" && "$HEAD_REPO" == "$BASE_REPO" ]]; then + SHOULD_RUN=true + elif [[ "$EVENT" == "pull_request_target" && "$HEAD_REPO" != "$BASE_REPO" && "$HAS_LABEL" == "true" ]]; then + SHOULD_RUN=true + else + SHOULD_RUN=false fi - # For pull_request_target, check if it's from a fork - if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then - # Check if PR is from a fork - if [[ "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.event.pull_request.base.repo.full_name }}" ]]; then - echo "PR is from the same repository. Skipping pull_request_target workflow." - echo "should_run=false" >> $GITHUB_OUTPUT - exit 0 - fi - - # If from fork, check for trusted_contributor label - if [[ "${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }}" == "true" ]]; then - echo "PR is from a fork with 'trusted_contributor' label. Authorized!" - echo "should_run=true" >> $GITHUB_OUTPUT - exit 0 - fi - - # Otherwise, don't run - echo "PR is from a fork without 'trusted_contributor' label. Skipping." - echo "should_run=false" >> $GITHUB_OUTPUT - exit 0 - fi + echo "Decision: should_run=$SHOULD_RUN" + echo "should_run=$SHOULD_RUN" >> $GITHUB_OUTPUT lint: - runs-on: ubuntu-latest - needs: [authorize-fork-pr] - if: needs.authorize-fork-pr.outputs.should_run == 'true' + runs-on: ubuntu-latest # default runner runs out of disk space due to hf cache + needs: [authorize] + if: needs.authorize.outputs.should_run == 'true' steps: - name: Checkout Repository uses: actions/checkout@v4 with: ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu + + - name: Setup uv + uses: astral-sh/setup-uv@v6 with: - github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} + version: "~=0.8.16" + - name: Run Pre-Commit - run: | - poetry run pre-commit install - poetry run pre-commit run -a + run: uvx pre-commit run --all-files + + - name: Dependency check + run: ./utils/dependency_check.sh + - name: Run MyPy - run: | - poetry run mypy src tests utils + run: uv run --all-extras mypy hf-datasets-cache: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [authorize-fork-pr] - if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.authorize-fork-pr.outputs.should_run == 'true' + runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately + needs: [authorize] + if: needs.authorize.outputs.should_run == 'true' steps: - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu + - uses: actions/checkout@v4 + if: github.ref == 'refs/heads/main' + with: + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + if: github.ref == 'refs/heads/main' with: - github-repo-token: ${{ secrets.TOKEN_AA_GITHUB_REPO_READER }} + version: "~=0.8.16" + - name: Huggingface datasets cache uses: actions/cache@v4 + if: github.ref == 'refs/heads/main' with: - path: ${{ env.HF_DATASET_CACHE_DIR }} + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- use shared env key: hf-datasets-${{ github.run_id }} restore-keys: | hf-datasets- + - name: Download datasets + if: github.ref == 'refs/heads/main' + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: uv run --extra=comet --extra=openai python -c "from eval_framework.tasks.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" + + tag: + # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) + runs-on: ubuntu-latest + needs: [authorize] + if: needs.authorize.outputs.should_run == 'true' + outputs: + tag: ${{ steps.set-tag.outputs.tag }} + image: ${{ steps.set-tag.outputs.image }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set Tag + id: set-tag run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export PATH=$HOME/.local/bin:$PATH - poetry run python -c "from eval_framework.task_names import make_sure_all_hf_datasets_are_in_cache; make_sure_all_hf_datasets_are_in_cache()" + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + TAG='latest' + else + # head_ref is the correct branch name for PRs + BRANCH_NAME=${{ github.head_ref || github.ref_name }} + # Convert slashes with hyphens and ensure valid Docker tag format + TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) + fi + echo "tag=$TAG" >> $GITHUB_OUTPUT + echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT + + - name: Output Docker Tag + run: | + echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" + echo "Docker image: ${{ steps.set-tag.outputs.image }}" build: - needs: [authorize-fork-pr, lint] + # Build and Push Docker Image (GPU Runs) + needs: [authorize, lint, tag] + if: needs.authorize.outputs.should_run == 'true' runs-on: cpu-runner-8c-32gb-01 container: docker:dind - if: needs.authorize-fork-pr.outputs.should_run == 'true' && needs.lint.result == 'success' - env: - REGISTRY: ghcr.io - IMAGE_NAME: eval_framework_public - REPO_OWNER_LC: aleph-alpha-research steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -210,11 +165,11 @@ jobs: ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - name: Registry Authentication - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: token + password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} - name: Setup Docker BuildX uses: docker/setup-buildx-action@v1 @@ -225,161 +180,138 @@ jobs: context: . file: Dockerfile push: true - tags: | - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest + tags: ${{ needs.tag.outputs.image }} - test-cpu: - runs-on: cpu-runner-8c-32gb-01 - container: derskythe/github-runner-base:ubuntu-noble - needs: [authorize-fork-pr, lint, hf-datasets-cache] - if: | - always() && - needs.authorize-fork-pr.outputs.should_run == 'true' && - needs.lint.result == 'success' && - (needs.hf-datasets-cache.result == 'success' || needs.hf-datasets-cache.result == 'skipped') + test-extras: + # Test uv installs (CPU) + runs-on: ubuntu-latest + needs: [authorize, lint] + if: needs.authorize.outputs.should_run == 'true' + strategy: + fail-fast: false + matrix: + extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional'] steps: - name: Checkout Repository uses: actions/checkout@v4 with: ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu - - name: Huggingface datasets cache - uses: actions/cache/restore@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v6 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} - key: hf-datasets- - - name: Run tests + version: "~=0.8.16" + + - name: Verify install and functionality via uv --exact run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + if [ "${{ matrix.extras }}" != "" ]; then + echo "Testing extra: ${{ matrix.extras }}" + uv run --exact --extra ${{ matrix.extras }} pytest -v --noconftest tests/tests_eval_framework/installs/test_${{ matrix.extras }}.py + else + echo "Testing core install" + uv run --exact pytest --noconftest -v tests/tests_eval_framework/installs/test_core.py + fi - test-cpu-slow: + test-cpu: runs-on: cpu-runner-8c-32gb-01 container: derskythe/github-runner-base:ubuntu-noble - needs: [authorize-fork-pr, lint, hf-datasets-cache] - if: | - always() && - needs.authorize-fork-pr.outputs.should_run == 'true' && - needs.lint.result == 'success' && - (needs.hf-datasets-cache.result == 'success' || needs.hf-datasets-cache.result == 'skipped') + needs: [authorize, hf-datasets-cache, test-extras] + if: needs.authorize.outputs.should_run == 'true' steps: - - name: Checkout Repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - - name: Setup poetry - uses: ./.github/actions/setup-poetry-on-ubuntu + + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" + - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path key: hf-datasets- + - name: Run tests - run: | - export HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} - export HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} - export PATH=$HOME/.local/bin:$PATH - poetry run python -c "import nltk; nltk.download('punkt_tab')" - poetry run pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - test-gpu: - needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow] - runs-on: EvalFrameworkGPURunner - if: | - always() && - needs.authorize-fork-pr.outputs.should_run == 'true' && - needs.test-cpu.result == 'success' && - needs.test-cpu-slow.result == 'success' && - (needs.build.result == 'success' || (github.event_name != 'push' && needs.build.result == 'skipped')) + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: uv run --all-extras pytest --durations=30 -v -m "not gpu and not cpu_slow and not external_api" + + test-cpu-slow: + runs-on: cpu-runner-8c-32gb-01 + container: derskythe/github-runner-base:ubuntu-noble + needs: [authorize, hf-datasets-cache, test-extras] + if: needs.authorize.outputs.should_run == 'true' steps: - - name: Checkout repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - - name: Normalize repo owner to lowercase - run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + - name: Setup uv + uses: astral-sh/setup-uv@v6 + with: + version: "~=0.8.16" - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path key: hf-datasets- - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image + - name: Run tests + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')" + uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api" - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'poetry run pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"' - - test-gpu-vllm: - needs: [authorize-fork-pr, build, test-cpu, test-cpu-slow, test-gpu] + test-docker-gpu: runs-on: EvalFrameworkGPURunner - if: | - always() && - needs.authorize-fork-pr.outputs.should_run == 'true' && - needs.test-cpu.result == 'success' && - needs.test-cpu-slow.result == 'success' && - (needs.build.result == 'success' || (github.event_name != 'push' && needs.build.result == 'skipped')) && - (needs.test-gpu.result == 'success' || needs.test-gpu.result == 'skipped') + needs: [authorize, tag, build, test-cpu, test-cpu-slow] + if: needs.authorize.outputs.should_run == 'true' + container: + image: "${{ needs.tag.outputs.image }}" + credentials: + username: token + password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }} + options: --gpus all + defaults: + run: + working-directory: /eval_framework steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - - - name: Normalize repo owner to lowercase + - name: Verify GPU installs via uv --exact run: | - echo "REPO_OWNER_LC=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + set -e # fail fast if any test fails + echo "Testing vllm extra" + uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py + + echo "Testing mistral extra" + uv run --exact --extra mistral pytest -v --noconftest tests/tests_eval_framework/installs/test_mistral.py + + echo "Testing all extras together" + uv run --exact --all-extras pytest -v --noconftest tests/tests_eval_framework/installs/ - name: Huggingface datasets cache uses: actions/cache/restore@v4 with: - path: ${{ env.HF_DATASET_CACHE_DIR }} + path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path key: hf-datasets- - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull container image - run: | - docker pull "${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest" + - name: Test GPU + timeout-minutes: 20 + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + run: pytest --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm" - - name: Run GPU tests inside container with GPU access - run: | - timeout 20m docker run --rm --gpus all \ - -v ${{ github.workspace }}:/eval_framework \ - -w /eval_framework \ - -e HF_TOKEN=${{ secrets.HUGGINGFACE_API_KEY }} \ - -e HF_DATASET_CACHE_DIR=${{ env.HF_DATASET_CACHE_DIR }} \ - -e VLLM_LOGGING_LEVEL=DEBUG \ - -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ - -e VLLM_USE_MODELSCOPE=False \ - -e VLLM_NCCL_SO_PATH="" \ - -e VLLM_USE_TRITON_FLASH_ATTN=0 \ - -e VLLM_DISABLE_CUSTOM_ALL_REDUCE=1 \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER_LC }}/${{ env.IMAGE_NAME }}:latest \ - bash -c 'curl -sSL https://install.python-poetry.org | python3 - && export PATH="$HOME/.local/bin:$PATH" && poetry --version && poetry run pytest --log-cli-level=INFO -v -m "vllm"' + - name: Test VLLM + timeout-minutes: 20 + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + VLLM_LOGGING_LEVEL: DEBUG + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: False + VLLM_NCCL_SO_PATH: "" + VLLM_USE_TRITON_FLASH_ATTN: 0 + VLLM_DISABLE_CUSTOM_ALL_REDUCE: 1 + run: pytest --log-cli-level=INFO -v -m "vllm" From 74d947b3b42cba53dd416c2364fe36cfe490a813 Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Mon, 1 Dec 2025 10:24:06 +0000 Subject: [PATCH 3/6] fix tag ci job --- .github/workflows/tests.yml | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 12ff7126..c23ed090 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -125,32 +125,30 @@ jobs: tag: # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) runs-on: ubuntu-latest - needs: [authorize] - if: needs.authorize.outputs.should_run == 'true' outputs: tag: ${{ steps.set-tag.outputs.tag }} image: ${{ steps.set-tag.outputs.image }} steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set Tag - id: set-tag - run: | - if [ "${{ github.ref }}" == "refs/heads/main" ]; then - TAG='latest' - else - # head_ref is the correct branch name for PRs - BRANCH_NAME=${{ github.head_ref || github.ref_name }} - # Convert slashes with hyphens and ensure valid Docker tag format - TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) - fi - echo "tag=$TAG" >> $GITHUB_OUTPUT - echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT - - - name: Output Docker Tag - run: | - echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" - echo "Docker image: ${{ steps.set-tag.outputs.image }}" + - name: Checkout code + uses: actions/checkout@v4 + - name: Set Tag + id: set-tag + run: | + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + TAG='latest' + else + # head_ref is the correct branch name for PRs + BRANCH_NAME=${{ github.head_ref || github.ref_name }} + # Convert slashes with hyphens and ensure valid Docker tag format + TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) + fi + echo "tag=$TAG" >> $GITHUB_OUTPUT + echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT + + - name: Output Docker Tag + run: | + echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}" + echo "Docker image: ${{ steps.set-tag.outputs.image }}" build: # Build and Push Docker Image (GPU Runs) From fabb02a028a53cbe3a2d7d965b97b94529fad0ad Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Mon, 1 Dec 2025 10:36:47 +0000 Subject: [PATCH 4/6] add disclaimer on ci workflow for fork prs --- .github/workflows/tests.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c23ed090..48f02ed8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,7 +40,11 @@ env: HAS_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }} jobs: - # Security check for pull_request_target (fork PRs) + # ============================================================================ + # SECURITY: This workflow uses pull_request_target for fork PR support. + # Fork PRs require 'trusted_contributor' label from maintainers. + # Maintainers MUST review fork code before adding the label. + # ============================================================================ authorize: runs-on: ubuntu-latest outputs: @@ -125,12 +129,16 @@ jobs: tag: # Set Docker Tag and Image Name for Docker Build and Push (GPU Runs) runs-on: ubuntu-latest + needs: [authorize] + if: needs.authorize.outputs.should_run == 'true' outputs: tag: ${{ steps.set-tag.outputs.tag }} image: ${{ steps.set-tag.outputs.image }} steps: - name: Checkout code uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }} - name: Set Tag id: set-tag run: | @@ -138,7 +146,7 @@ jobs: TAG='latest' else # head_ref is the correct branch name for PRs - BRANCH_NAME=${{ github.head_ref || github.ref_name }} + BRANCH_NAME="${{ github.head_ref || github.ref_name }}" # Convert slashes with hyphens and ensure valid Docker tag format TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) fi From 8dfb8ace6016f97ca17dc3cc0ed61209fd8ba65f Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Wed, 3 Dec 2025 13:56:57 +0000 Subject: [PATCH 5/6] ci tag name change --- .github/workflows/tests.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 48f02ed8..366d9950 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -142,13 +142,20 @@ jobs: - name: Set Tag id: set-tag run: | - if [ "${{ github.ref }}" == "refs/heads/main" ]; then + if [ "${{ github.event_name }}" = "pull_request_target" ]; then + # SECURITY: Never use attacker-controlled branch names in pull_request_target + TAG="pr-${{ github.event.pull_request.number }}" + elif [ "${{ github.ref }}" = "refs/heads/main" ]; then TAG='latest' + elif [ "${{ github.event_name }}" = "pull_request" ]; then + TAG="pr-${{ github.event.pull_request.number }}" else - # head_ref is the correct branch name for PRs - BRANCH_NAME="${{ github.head_ref || github.ref_name }}" - # Convert slashes with hyphens and ensure valid Docker tag format + BRANCH_NAME="${{ github.ref_name }}" TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20) + # Prevent collision with protected tags + if [ "$TAG" = "latest" ] || [ "$TAG" = "main" ]; then + TAG="branch-${TAG}-${{ github.run_id }}" + fi fi echo "tag=$TAG" >> $GITHUB_OUTPUT echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT From c77a6a9063e8e73567450dec9382916e1ed61fea Mon Sep 17 00:00:00 2001 From: ahmedhammam Date: Fri, 12 Dec 2025 16:41:50 +0000 Subject: [PATCH 6/6] remove comment --- .github/workflows/tests.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 366d9950..6bd84712 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,11 +40,6 @@ env: HAS_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }} jobs: - # ============================================================================ - # SECURITY: This workflow uses pull_request_target for fork PR support. - # Fork PRs require 'trusted_contributor' label from maintainers. - # Maintainers MUST review fork code before adding the label. - # ============================================================================ authorize: runs-on: ubuntu-latest outputs: