Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 78 additions & 12 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,18 @@
types: [opened, reopened, synchronize, labeled]
paths-ignore:
- '**.md'
pull_request_target:
types: [opened, reopened, synchronize, labeled]
paths-ignore:
- '**.md'
# Manually trigger a workflow for a branch
workflow_dispatch:
# Merge queue trigger
merge_group:

permissions:
contents: read
pull-requests: read

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand All @@ -28,13 +33,48 @@
HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth
UV_LINK_MODE: symlink
UV_LOCKED: 1
# Authorization context
EVENT: ${{ github.event_name }}
HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name || 'N/A' }}
BASE_REPO: ${{ github.event.pull_request.base.repo.full_name || 'N/A' }}
HAS_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'trusted_contributor') }}

jobs:
authorize:
runs-on: ubuntu-latest
outputs:
should_run: ${{ steps.check.outputs.should_run }}
steps:
- name: Check authorization
id: check
run: |
echo "Event: $EVENT"
echo "Head repo: $HEAD_REPO"
echo "Base repo: $BASE_REPO"
echo "Has trusted_contributor label: $HAS_LABEL"

if [[ "$EVENT" == "push" || "$EVENT" == "workflow_dispatch" || "$EVENT" == "merge_group" ]]; then
SHOULD_RUN=true
elif [[ "$EVENT" == "pull_request" && "$HEAD_REPO" == "$BASE_REPO" ]]; then
SHOULD_RUN=true
elif [[ "$EVENT" == "pull_request_target" && "$HEAD_REPO" != "$BASE_REPO" && "$HAS_LABEL" == "true" ]]; then
SHOULD_RUN=true
else
SHOULD_RUN=false
fi

echo "Decision: should_run=$SHOULD_RUN"
echo "should_run=$SHOULD_RUN" >> $GITHUB_OUTPUT

lint:
runs-on: ubuntu-latest # default runner runs out of disk space due to hf cache
needs: [authorize]
if: needs.authorize.outputs.should_run == 'true'
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Setup uv
uses: astral-sh/setup-uv@v6
Expand All @@ -44,19 +84,23 @@
- name: Run Pre-Commit
run: uvx pre-commit run --all-files

- name: Dependency check
run: ./utils/dependency_check.sh

- name: Run MyPy

Check failure

Code scanning / CodeQL

Cache Poisoning via execution of untrusted code High test

Potential cache poisoning in the context of the default branch due to privilege checkout of untrusted code. (
pull_request_target
).
run: uv run --all-extras mypy

hf-datasets-cache:
runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately
needs: [authorize]
if: needs.authorize.outputs.should_run == 'true'
steps:
- uses: actions/checkout@v4
if: github.ref == 'refs/heads/main'
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Setup uv

Check failure

Code scanning / CodeQL

Checkout of untrusted code in trusted context High test

Potential execution of untrusted code on a privileged workflow (
pull_request_target
)
uses: astral-sh/setup-uv@v6
if: github.ref == 'refs/heads/main'
with:
Expand All @@ -80,22 +124,33 @@
tag:
# Set Docker Tag and Image Name for Docker Build and Push (GPU Runs)
runs-on: ubuntu-latest
needs: [authorize]
if: needs.authorize.outputs.should_run == 'true'
outputs:
tag: ${{ steps.set-tag.outputs.tag }}
image: ${{ steps.set-tag.outputs.image }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}
- name: Set Tag
id: set-tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
if [ "${{ github.event_name }}" = "pull_request_target" ]; then
# SECURITY: Never use attacker-controlled branch names in pull_request_target
TAG="pr-${{ github.event.pull_request.number }}"
elif [ "${{ github.ref }}" = "refs/heads/main" ]; then
TAG='latest'
elif [ "${{ github.event_name }}" = "pull_request" ]; then
TAG="pr-${{ github.event.pull_request.number }}"
else
# head_ref is the correct branch name for PRs
BRANCH_NAME=${{ github.head_ref || github.ref_name }}
# Convert slashes with hyphens and ensure valid Docker tag format
BRANCH_NAME="${{ github.ref_name }}"
TAG=$(echo "${BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | cut -c1-20)
# Prevent collision with protected tags
if [ "$TAG" = "latest" ] || [ "$TAG" = "main" ]; then
TAG="branch-${TAG}-${{ github.run_id }}"
fi
fi
echo "tag=$TAG" >> $GITHUB_OUTPUT
echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT
Expand All @@ -107,14 +162,17 @@

build:
# Build and Push Docker Image (GPU Runs)
needs: [lint, tag]
needs: [authorize, lint, tag]
if: needs.authorize.outputs.should_run == 'true'
runs-on: cpu-runner-8c-32gb-01
container: docker:dind
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Registry Authentication

Check failure

Code scanning / CodeQL

Checkout of untrusted code in trusted context High test

Potential execution of untrusted code on a privileged workflow (
pull_request_target
)
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
Expand All @@ -135,14 +193,17 @@
test-extras:
# Test uv installs (CPU)
runs-on: ubuntu-latest
needs: [lint]
needs: [authorize, lint]
if: needs.authorize.outputs.should_run == 'true'
strategy:
fail-fast: false
matrix:
extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional']
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Setup uv
uses: astral-sh/setup-uv@v6
Expand All @@ -162,11 +223,14 @@
test-cpu:
runs-on: cpu-runner-8c-32gb-01
container: derskythe/github-runner-base:ubuntu-noble
needs: [hf-datasets-cache, test-extras]
needs: [authorize, hf-datasets-cache, test-extras]
if: needs.authorize.outputs.should_run == 'true'
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Setup uv

Check failure

Code scanning / CodeQL

Checkout of untrusted code in trusted context High test

Potential execution of untrusted code on a privileged workflow (
pull_request_target
)
uses: astral-sh/setup-uv@v6
with:
version: "~=0.8.16"
Expand All @@ -185,11 +249,14 @@
test-cpu-slow:
runs-on: cpu-runner-8c-32gb-01
container: derskythe/github-runner-base:ubuntu-noble
needs: [hf-datasets-cache, test-extras]
needs: [authorize, hf-datasets-cache, test-extras]
if: needs.authorize.outputs.should_run == 'true'
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || github.sha }}

- name: Setup uv

Check failure

Code scanning / CodeQL

Checkout of untrusted code in trusted context High test

Potential execution of untrusted code on a privileged workflow (
pull_request_target
)
uses: astral-sh/setup-uv@v6
with:
version: "~=0.8.16"
Expand All @@ -204,13 +271,13 @@
env:
HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
run: |
uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')" # otherwise there's a race condition in ntltk
uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')"
uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api"

test-docker-gpu:
# Run full test suite in Docker Container with GPU
runs-on: EvalFrameworkGPURunner
needs: [tag, build, test-cpu, test-cpu-slow]
needs: [authorize, tag, build, test-cpu, test-cpu-slow]
if: needs.authorize.outputs.should_run == 'true'
container:
image: "${{ needs.tag.outputs.image }}"
credentials:
Expand All @@ -224,7 +291,6 @@
- name: Verify GPU installs via uv --exact
run: |
set -e # fail fast if any test fails

echo "Testing vllm extra"
uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py

Expand Down