From 05ac2cdf0c594b8ce796e0af3b90b1cc1461bc09 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 13:12:30 +0200 Subject: [PATCH 01/51] Add initial dockerfiles --- Dockerfile.amd64 | 4 ++++ Dockerfile.arm64 | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 Dockerfile.amd64 create mode 100644 Dockerfile.arm64 diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 new file mode 100644 index 0000000..bd52886 --- /dev/null +++ b/Dockerfile.amd64 @@ -0,0 +1,4 @@ +FROM vllm/vllm-openai:v0.10.2 + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system triton==3.2.0 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 new file mode 100644 index 0000000..da7f9cb --- /dev/null +++ b/Dockerfile.arm64 @@ -0,0 +1,22 @@ +FROM python:3.12-slim AS triton-builder + +RUN apt-get update -qq && \ + apt-get install -qq -y git && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/triton-lang/triton.git /tmp/triton \ + --depth 1 \ + --branch v3.2.0 + +RUN --mount=type=cache,target=/root/.cache/pip \ + cd /tmp/triton && \ + pip install ninja cmake wheel pybind11 && \ + pip wheel ./python --wheel-dir /tmp/wheels + +FROM vllm/vllm-openai:v0.10.2 AS final + +COPY --from=triton-builder /tmp/wheels/*.whl /tmp/wheels/ + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system /tmp/wheels/*.whl && \ + rm -rf /tmp/wheels From 6ab58a324c03f2307f05c6d3b82227b75bddd34d Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 13:12:37 +0200 Subject: [PATCH 02/51] Add initial workflow --- .github/workflows/test.yml | 61 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a1f24fd --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: test-docker-image + +on: + pull_request: + branches: + - main + +env: + IMAGE: zappi/vllm-openai + +jobs: + build-amd64: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Prepare image metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Test building of image + uses: docker/build-push-action@v6 + with: + cache-from: type=gha + cache-to: type=gha,mode=max + context: . + file: Dockerfile.amd64 + labels: ${{ steps.metadata.outputs.labels }} + platforms: linux/amd64 + push: false + tags: ${{ steps.metadata.outputs.tags }} + build-arm64: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Prepare image metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Test building of image + uses: docker/build-push-action@v6 + with: + cache-from: type=gha + cache-to: type=gha,mode=max + context: . + file: Dockerfile.arm64 + labels: ${{ steps.metadata.outputs.labels }} + platforms: linux/arm64 + push: false + tags: ${{ steps.metadata.outputs.tags }} From 1b7baa0cc13fec59224c87d5d20716671369258c Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 13:25:43 +0200 Subject: [PATCH 03/51] Prepare more disk space --- .github/workflows/test.yml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a1f24fd..be8304f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,6 +12,25 @@ jobs: build-amd64: runs-on: ubuntu-latest steps: + - name: Prepare additional disk space + run: | + echo "Listing 100 largest packages" + dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 + df -h + echo "Removing large packages" + sudo apt-get remove -y '^ghc-8.*' + sudo apt-get remove -y '^dotnet-.*' + sudo apt-get remove -y '^llvm-.*' + sudo apt-get remove -y 'php.*' + sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel + sudo apt-get autoremove -y + sudo apt-get clean + df -h + echo "Removing large directories" + # deleting 15GB + rm -rf /usr/share/dotnet/ + rm -rf /opt/hostedtoolcache + df -h - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata @@ -37,6 +56,25 @@ jobs: build-arm64: runs-on: ubuntu-latest steps: + - name: Prepare additional disk space + run: | + echo "Listing 100 largest packages" + dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 + df -h + echo "Removing large packages" + sudo apt-get remove -y '^ghc-8.*' + sudo apt-get remove -y '^dotnet-.*' + sudo apt-get remove -y '^llvm-.*' + sudo apt-get remove -y 'php.*' + sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel + sudo apt-get autoremove -y + sudo apt-get clean + df -h + echo "Removing large directories" + # deleting 15GB + rm -rf /usr/share/dotnet/ + rm -rf /opt/hostedtoolcache + df -h - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata From 750631d4de223fe32ffcd932ab8848ac943546c0 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:12:29 +0200 Subject: [PATCH 04/51] Install additional dependencies --- Dockerfile.arm64 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index da7f9cb..acc4c24 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,13 +1,20 @@ FROM python:3.12-slim AS triton-builder RUN apt-get update -qq && \ - apt-get install -qq -y git && \ + apt-get install -qq -y \ + build-essential \ + clang \ + cmake \ + git \ + lld \ + llvm && \ rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/triton-lang/triton.git /tmp/triton \ --depth 1 \ --branch v3.2.0 +ARG TRITON_BUILD_WITH_CLANG_LLD=true RUN --mount=type=cache,target=/root/.cache/pip \ cd /tmp/triton && \ pip install ninja cmake wheel pybind11 && \ From 69cb352d653e4f68e3814c911ca2e76a3659c6c3 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:15:16 +0200 Subject: [PATCH 05/51] Remove missing package from deletion --- .github/workflows/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be8304f..46e6c2f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,6 @@ jobs: dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 df -h echo "Removing large packages" - sudo apt-get remove -y '^ghc-8.*' sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' @@ -62,7 +61,6 @@ jobs: dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 df -h echo "Removing large packages" - sudo apt-get remove -y '^ghc-8.*' sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' From 7e1e408d4814dd2d07bfd48b57165bf771d03f7f Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:22:13 +0200 Subject: [PATCH 06/51] Update removals --- .github/workflows/test.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46e6c2f..7454ea7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,21 +14,24 @@ jobs: steps: - name: Prepare additional disk space run: | - echo "Listing 100 largest packages" - dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 df -h echo "Removing large packages" sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' - sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel + sudo apt-get remove -y azure-cli + sudo apt-get remove -y firefox + sudo apt-get remove -y powershell + sudo apt-get remove -y google-chrome-stable + sudo apt-get remove -y google-cloud-cli + sudo apt-get remove -y mono-devel sudo apt-get autoremove -y sudo apt-get clean df -h echo "Removing large directories" # deleting 15GB rm -rf /usr/share/dotnet/ - rm -rf /opt/hostedtoolcache + rm -rf /opt/hostedtoolcache/ df -h - name: Checkout uses: actions/checkout@v4 @@ -57,21 +60,24 @@ jobs: steps: - name: Prepare additional disk space run: | - echo "Listing 100 largest packages" - dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 df -h echo "Removing large packages" sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' - sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel + sudo apt-get remove -y azure-cli + sudo apt-get remove -y firefox + sudo apt-get remove -y powershell + sudo apt-get remove -y google-chrome-stable + sudo apt-get remove -y google-cloud-cli + sudo apt-get remove -y mono-devel sudo apt-get autoremove -y sudo apt-get clean df -h echo "Removing large directories" # deleting 15GB rm -rf /usr/share/dotnet/ - rm -rf /opt/hostedtoolcache + rm -rf /opt/hostedtoolcache/ df -h - name: Checkout uses: actions/checkout@v4 From aa54d062571f7cc8758d3a417782f423347087aa Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:35:00 +0200 Subject: [PATCH 07/51] Move prepare to its own job --- .github/workflows/test.yml | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7454ea7..e7defb7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,10 +9,10 @@ env: IMAGE: zappi/vllm-openai jobs: - build-amd64: + prepare: runs-on: ubuntu-latest steps: - - name: Prepare additional disk space + - name: Free additional disk space run: | df -h echo "Removing large packages" @@ -33,6 +33,11 @@ jobs: rm -rf /usr/share/dotnet/ rm -rf /opt/hostedtoolcache/ df -h + + build-amd64: + runs-on: ubuntu-latest + needs: prepare + steps: - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata @@ -55,30 +60,11 @@ jobs: platforms: linux/amd64 push: false tags: ${{ steps.metadata.outputs.tags }} + build-arm64: runs-on: ubuntu-latest + needs: prepare steps: - - name: Prepare additional disk space - run: | - df -h - echo "Removing large packages" - sudo apt-get remove -y '^dotnet-.*' - sudo apt-get remove -y '^llvm-.*' - sudo apt-get remove -y 'php.*' - sudo apt-get remove -y azure-cli - sudo apt-get remove -y firefox - sudo apt-get remove -y powershell - sudo apt-get remove -y google-chrome-stable - sudo apt-get remove -y google-cloud-cli - sudo apt-get remove -y mono-devel - sudo apt-get autoremove -y - sudo apt-get clean - df -h - echo "Removing large directories" - # deleting 15GB - rm -rf /usr/share/dotnet/ - rm -rf /opt/hostedtoolcache/ - df -h - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata From 5b293dcf5087a959a27f5c148f0831d35b4afda4 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:45:38 +0200 Subject: [PATCH 08/51] Use concurrency to separate jobs onto two builders --- .github/workflows/test.yml | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7defb7..8a4e23b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,35 +9,15 @@ env: IMAGE: zappi/vllm-openai jobs: - prepare: + build-amd64: runs-on: ubuntu-latest + concurrency: amd64 steps: - name: Free additional disk space run: | df -h - echo "Removing large packages" - sudo apt-get remove -y '^dotnet-.*' - sudo apt-get remove -y '^llvm-.*' - sudo apt-get remove -y 'php.*' - sudo apt-get remove -y azure-cli - sudo apt-get remove -y firefox - sudo apt-get remove -y powershell - sudo apt-get remove -y google-chrome-stable - sudo apt-get remove -y google-cloud-cli - sudo apt-get remove -y mono-devel - sudo apt-get autoremove -y - sudo apt-get clean - df -h - echo "Removing large directories" - # deleting 15GB - rm -rf /usr/share/dotnet/ rm -rf /opt/hostedtoolcache/ df -h - - build-amd64: - runs-on: ubuntu-latest - needs: prepare - steps: - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata @@ -63,8 +43,13 @@ jobs: build-arm64: runs-on: ubuntu-latest - needs: prepare + concurrency: arm64 steps: + - name: Free additional disk space + run: | + df -h + rm -rf /opt/hostedtoolcache/ + df -h - name: Checkout uses: actions/checkout@v4 - name: Prepare image metadata From 392be6162c4d37ef6226795c483e4ed4a06d0a9e Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:52:03 +0200 Subject: [PATCH 09/51] Remove more stuff --- .github/workflows/test.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a4e23b..63ea5e2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,14 @@ jobs: - name: Free additional disk space run: | df -h - rm -rf /opt/hostedtoolcache/ + echo "Removing android..." + rm -rf /usr/local/lib/android + echo "Removing dotnet..." + rm -rf /usr/share/dotnet + echo "Removing haskell" + rm -rf /opt/ghc + echo "Removing tool cache..." + rm -rf /opt/hostedtoolcache df -h - name: Checkout uses: actions/checkout@v4 @@ -48,7 +55,14 @@ jobs: - name: Free additional disk space run: | df -h - rm -rf /opt/hostedtoolcache/ + echo "Removing android..." + rm -rf /usr/local/lib/android + echo "Removing dotnet..." + rm -rf /usr/share/dotnet + echo "Removing haskell" + rm -rf /opt/ghc + echo "Removing tool cache..." + rm -rf /opt/hostedtoolcache df -h - name: Checkout uses: actions/checkout@v4 From 78e3ee52676beb9dca922f3c80893918c717a329 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 14:59:36 +0200 Subject: [PATCH 10/51] Sudo --- .github/workflows/test.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 63ea5e2..0c9ca59 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,13 +17,14 @@ jobs: run: | df -h echo "Removing android..." - rm -rf /usr/local/lib/android + sudo rm -rf /usr/local/lib/android echo "Removing dotnet..." - rm -rf /usr/share/dotnet + sudo rm -rf /usr/share/dotnet echo "Removing haskell" - rm -rf /opt/ghc + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." - rm -rf /opt/hostedtoolcache + sudo rm -rf /opt/hostedtoolcache df -h - name: Checkout uses: actions/checkout@v4 @@ -56,13 +57,14 @@ jobs: run: | df -h echo "Removing android..." - rm -rf /usr/local/lib/android + sudo rm -rf /usr/local/lib/android echo "Removing dotnet..." - rm -rf /usr/share/dotnet + sudo rm -rf /usr/share/dotnet echo "Removing haskell" - rm -rf /opt/ghc + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." - rm -rf /opt/hostedtoolcache + sudo rm -rf /opt/hostedtoolcache df -h - name: Checkout uses: actions/checkout@v4 From a7692f688647bba28feaeeab400cad0a3a20f3e1 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Wed, 1 Oct 2025 15:13:25 +0200 Subject: [PATCH 11/51] Disable caching --- .github/workflows/test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0c9ca59..83249b4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,8 +40,6 @@ jobs: - name: Test building of image uses: docker/build-push-action@v6 with: - cache-from: type=gha - cache-to: type=gha,mode=max context: . file: Dockerfile.amd64 labels: ${{ steps.metadata.outputs.labels }} @@ -80,8 +78,6 @@ jobs: - name: Test building of image uses: docker/build-push-action@v6 with: - cache-from: type=gha - cache-to: type=gha,mode=max context: . file: Dockerfile.arm64 labels: ${{ steps.metadata.outputs.labels }} From 5080a2b9e0f54162f58d5c630b11f110d77b75a3 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 00:27:48 +0200 Subject: [PATCH 12/51] Try --- .github/workflows/test.yml | 25 +++++++++++++++++++++++++ Dockerfile.arm64 | 32 ++++---------------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 83249b4..3a053f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,25 @@ env: IMAGE: zappi/vllm-openai jobs: + build-triton-wheel-arm64: + runs-on: ubuntu-24.04-arm + concurrency: arm64 + steps: + - name: Build wheels + uses: pypa/cibuildwheel@v3.2.0 + env: + CIBW_BEFORE_ALL: "dnf install clang lld -y" + CIBW_BUILD: "cp312-manylinux_aarch64" + CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + with: + package-dir: python + output-dir: wheelhouse + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/*.whl + build-amd64: runs-on: ubuntu-latest concurrency: amd64 @@ -49,6 +68,7 @@ jobs: build-arm64: runs-on: ubuntu-latest + needs: build-triton-wheel-arm64 concurrency: arm64 steps: - name: Free additional disk space @@ -66,6 +86,11 @@ jobs: df -h - name: Checkout uses: actions/checkout@v4 + - name: Download a single artifact + uses: actions/download-artifact@v5 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/ - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index acc4c24..43a6040 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,29 +1,5 @@ -FROM python:3.12-slim AS triton-builder +FROM vllm/vllm-openai:v0.10.2 -RUN apt-get update -qq && \ - apt-get install -qq -y \ - build-essential \ - clang \ - cmake \ - git \ - lld \ - llvm && \ - rm -rf /var/lib/apt/lists/* - -RUN git clone https://github.com/triton-lang/triton.git /tmp/triton \ - --depth 1 \ - --branch v3.2.0 - -ARG TRITON_BUILD_WITH_CLANG_LLD=true -RUN --mount=type=cache,target=/root/.cache/pip \ - cd /tmp/triton && \ - pip install ninja cmake wheel pybind11 && \ - pip wheel ./python --wheel-dir /tmp/wheels - -FROM vllm/vllm-openai:v0.10.2 AS final - -COPY --from=triton-builder /tmp/wheels/*.whl /tmp/wheels/ - -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system /tmp/wheels/*.whl && \ - rm -rf /tmp/wheels +RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ + --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system /wheelhouse/*.whl From c3d92ab3d68c2255aa17cd36b64ed7098d233fb5 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 00:36:12 +0200 Subject: [PATCH 13/51] Fetch triton --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3a053f1..ae32a34 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,6 +13,11 @@ jobs: runs-on: ubuntu-24.04-arm concurrency: arm64 steps: + - name: Checkout + uses: actions/checkout@v4 + with: + repository: triton-lang/triton + ref: v3.2.0 - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: From d5c61210e7f94cb8f2fb4c3eecf07817996f868f Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 00:45:00 +0200 Subject: [PATCH 14/51] Try a thing --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ae32a34..d8650b9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,6 +18,11 @@ jobs: with: repository: triton-lang/triton ref: v3.2.0 + - name: Patch setup.py + run: | + echo "" >> python/setup.cfg + echo "[build_ext]" >> python/setup.cfg + echo "base-dir=/project" >> python/setup.cfg - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: From deabaa09106f3289a4e9e2bb9dd0e62c2073b472 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 00:48:26 +0200 Subject: [PATCH 15/51] Use current directory as package dir --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d8650b9..6a50c77 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" with: - package-dir: python + package-dir: . output-dir: wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 From 883d0b3a09af7dc15facc788a7fd1594e338b3b6 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 00:53:12 +0200 Subject: [PATCH 16/51] Try ubuntu 22.04 --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6a50c77..15324f3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ env: jobs: build-triton-wheel-arm64: - runs-on: ubuntu-24.04-arm + runs-on: ubuntu-22.04-arm concurrency: arm64 steps: - name: Checkout @@ -30,7 +30,7 @@ jobs: CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" with: - package-dir: . + package-dir: python output-dir: wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 From 5703600317dd9ad07712a4401f370e343b466920 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 01:20:48 +0200 Subject: [PATCH 17/51] Test --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 15324f3..8321451 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,9 +26,8 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: - CIBW_BEFORE_ALL: "dnf install clang lld -y" CIBW_BUILD: "cp312-manylinux_aarch64" - CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + CIBW_ENVIRONMENT: "MAX_JOBS=4" with: package-dir: python output-dir: wheelhouse From bd240b9ac6499dd6c6ef4e9f320985cb8c87e033 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 01:25:17 +0200 Subject: [PATCH 18/51] Test 3.4 --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8321451..a802911 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: uses: actions/checkout@v4 with: repository: triton-lang/triton - ref: v3.2.0 + ref: v3.4.0 - name: Patch setup.py run: | echo "" >> python/setup.cfg @@ -26,8 +26,9 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: + CIBW_BEFORE_ALL: "dnf install clang lld -y" CIBW_BUILD: "cp312-manylinux_aarch64" - CIBW_ENVIRONMENT: "MAX_JOBS=4" + CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" with: package-dir: python output-dir: wheelhouse From e57a996290a68d35c75076677a6084843a86f155 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 01:35:33 +0200 Subject: [PATCH 19/51] Yolo --- .github/workflows/test.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a802911..c7161a5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,20 +18,23 @@ jobs: with: repository: triton-lang/triton ref: v3.4.0 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" - name: Patch setup.py run: | echo "" >> python/setup.cfg echo "[build_ext]" >> python/setup.cfg echo "base-dir=/project" >> python/setup.cfg - name: Build wheels - uses: pypa/cibuildwheel@v3.2.0 env: CIBW_BEFORE_ALL: "dnf install clang lld -y" CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - with: - package-dir: python - output-dir: wheelhouse + run: | + python --version + python3 -m pip install cibuildwheel --user + python3 -m cibuildwheel python --output-dir wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 with: From 09ff98ba0daf7b3a5b4d50edff7c1bd4650551b7 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 01:36:26 +0200 Subject: [PATCH 20/51] Fix run --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c7161a5..3d4be64 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,10 +31,10 @@ jobs: CIBW_BEFORE_ALL: "dnf install clang lld -y" CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - run: | - python --version - python3 -m pip install cibuildwheel --user - python3 -m cibuildwheel python --output-dir wheelhouse + run: | + python --version + python3 -m pip install cibuildwheel --user + python3 -m cibuildwheel python --output-dir wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 with: From 405bd6b5db8c1c7a59d69656ce12975fc051d8a5 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 01:38:35 +0200 Subject: [PATCH 21/51] Back to 3.2.0 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3d4be64..c1aaaba 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: uses: actions/checkout@v4 with: repository: triton-lang/triton - ref: v3.4.0 + ref: v3.2.0 - uses: actions/setup-python@v5 with: python-version: "3.11" From fae44f5036e370d0a27049a3b1ee1add750e64dc Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 12:14:42 +0200 Subject: [PATCH 22/51] Test --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c1aaaba..a5bd6c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: uses: actions/checkout@v4 with: repository: triton-lang/triton - ref: v3.2.0 + ref: v3.4.0 - uses: actions/setup-python@v5 with: python-version: "3.11" @@ -34,7 +34,7 @@ jobs: run: | python --version python3 -m pip install cibuildwheel --user - python3 -m cibuildwheel python --output-dir wheelhouse + python3 -m cibuildwheel . --output-dir wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 with: From ea63a5970fe7f87e6cb34bc168c6f6236526530c Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 12:58:31 +0200 Subject: [PATCH 23/51] Try a thing --- .github/workflows/test.yml | 14 +- triton/patches/setup.cfg | 2 + triton/patches/setup.py | 761 +++++++++++++++++++++++++++++++++++++ 3 files changed, 771 insertions(+), 6 deletions(-) create mode 100644 triton/patches/setup.cfg create mode 100644 triton/patches/setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a5bd6c1..7214778 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,19 +13,21 @@ jobs: runs-on: ubuntu-22.04-arm concurrency: arm64 steps: + - name: Checkout + uses: actions/checkout@v4 - name: Checkout uses: actions/checkout@v4 with: repository: triton-lang/triton - ref: v3.4.0 + ref: v3.2.0 + path: triton - uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Patch setup.py + - name: Patch setup run: | - echo "" >> python/setup.cfg - echo "[build_ext]" >> python/setup.cfg - echo "base-dir=/project" >> python/setup.cfg + cp triton/patches/setup.cfg triton/python/setup.cfg + cp triton/patches/setup.py triton/python/setup.py - name: Build wheels env: CIBW_BEFORE_ALL: "dnf install clang lld -y" @@ -34,7 +36,7 @@ jobs: run: | python --version python3 -m pip install cibuildwheel --user - python3 -m cibuildwheel . --output-dir wheelhouse + python3 -m cibuildwheel triton/python --output-dir wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 with: diff --git a/triton/patches/setup.cfg b/triton/patches/setup.cfg new file mode 100644 index 0000000..d923951 --- /dev/null +++ b/triton/patches/setup.cfg @@ -0,0 +1,2 @@ +[build_ext] +base-dir=/project diff --git a/triton/patches/setup.py b/triton/patches/setup.py new file mode 100644 index 0000000..f9d78cc --- /dev/null +++ b/triton/patches/setup.py @@ -0,0 +1,761 @@ +import os +import platform +import re +import contextlib +import shlex +import shutil +import subprocess +import sys +import sysconfig +import tarfile +import zipfile +import urllib.request +import json +from io import BytesIO +from distutils.command.clean import clean +from pathlib import Path +from typing import List, NamedTuple, Optional + +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext +from setuptools.command.build_py import build_py +from dataclasses import dataclass + +from distutils.command.install import install +from setuptools.command.develop import develop +from setuptools.command.egg_info import egg_info +from wheel.bdist_wheel import bdist_wheel + +import pybind11 + + +@dataclass +class Backend: + name: str + package_data: List[str] + language_package_data: List[str] + src_dir: str + backend_dir: str + language_dir: Optional[str] + install_dir: str + is_external: bool + + +class BackendInstaller: + + @staticmethod + def prepare(backend_name: str, backend_src_dir: str = None, is_external: bool = False): + # Initialize submodule if there is one for in-tree backends. + if not is_external: + root_dir = os.path.join(os.pardir, "third_party") + assert backend_name in os.listdir( + root_dir), f"{backend_name} is requested for install but not present in {root_dir}" + + try: + subprocess.run(["git", "submodule", "update", "--init", f"{backend_name}"], check=True, + stdout=subprocess.DEVNULL, cwd=root_dir) + except subprocess.CalledProcessError: + pass + except FileNotFoundError: + pass + + backend_src_dir = os.path.join(root_dir, backend_name) + + backend_path = os.path.abspath(os.path.join(backend_src_dir, "backend")) + assert os.path.exists(backend_path), f"{backend_path} does not exist!" + + language_dir = os.path.abspath(os.path.join(backend_src_dir, "language")) + if not os.path.exists(language_dir): + language_dir = None + + for file in ["compiler.py", "driver.py"]: + assert os.path.exists(os.path.join(backend_path, file)), f"${file} does not exist in ${backend_path}" + + install_dir = os.path.join(os.path.dirname(__file__), "triton", "backends", backend_name) + package_data = [f"{os.path.relpath(p, backend_path)}/*" for p, _, _, in os.walk(backend_path)] + + language_package_data = [] + if language_dir is not None: + language_package_data = [f"{os.path.relpath(p, language_dir)}/*" for p, _, _, in os.walk(language_dir)] + + return Backend(name=backend_name, package_data=package_data, language_package_data=language_package_data, + src_dir=backend_src_dir, backend_dir=backend_path, language_dir=language_dir, + install_dir=install_dir, is_external=is_external) + + # Copy all in-tree backends under triton/third_party. + @staticmethod + def copy(active): + return [BackendInstaller.prepare(backend) for backend in active] + + # Copy all external plugins provided by the `TRITON_PLUGIN_DIRS` env var. + # TRITON_PLUGIN_DIRS is a semicolon-separated list of paths to the plugins. + # Expect to find the name of the backend under dir/backend/name.conf + @staticmethod + def copy_externals(): + backend_dirs = os.getenv("TRITON_PLUGIN_DIRS") + if backend_dirs is None: + return [] + backend_dirs = backend_dirs.strip().split(";") + backend_names = [Path(os.path.join(dir, "backend", "name.conf")).read_text().strip() for dir in backend_dirs] + return [ + BackendInstaller.prepare(backend_name, backend_src_dir=backend_src_dir, is_external=True) + for backend_name, backend_src_dir in zip(backend_names, backend_dirs) + ] + + +# Taken from https://github.com/pytorch/pytorch/blob/master/tools/setup_helpers/env.py +def check_env_flag(name: str, default: str = "") -> bool: + return os.getenv(name, default).upper() in ["ON", "1", "YES", "TRUE", "Y"] + + +def get_build_type(): + if check_env_flag("DEBUG"): + return "Debug" + elif check_env_flag("REL_WITH_DEB_INFO"): + return "RelWithDebInfo" + elif check_env_flag("TRITON_REL_BUILD_WITH_ASSERTS"): + return "TritonRelBuildWithAsserts" + elif check_env_flag("TRITON_BUILD_WITH_O1"): + return "TritonBuildWithO1" + else: + # TODO: change to release when stable enough + return "TritonRelBuildWithAsserts" + + +def get_env_with_keys(key: list): + for k in key: + if k in os.environ: + return os.environ[k] + return "" + + +def is_offline_build() -> bool: + """ + Downstream projects and distributions which bootstrap their own dependencies from scratch + and run builds in offline sandboxes + may set `TRITON_OFFLINE_BUILD` in the build environment to prevent any attempts at downloading + pinned dependencies from the internet or at using dependencies vendored in-tree. + + Dependencies must be defined using respective search paths (cf. `syspath_var_name` in `Package`). + Missing dependencies lead to an early abortion. + Dependencies' compatibility is not verified. + + Note that this flag isn't tested by the CI and does not provide any guarantees. + """ + return check_env_flag("TRITON_OFFLINE_BUILD", "") + + +# --- third party packages ----- + + +class Package(NamedTuple): + package: str + name: str + url: str + include_flag: str + lib_flag: str + syspath_var_name: str + + +# json +def get_json_package_info(): + url = "https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip" + return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH") + +def is_linux_os(id): + if os.path.exists("/etc/os-release"): + with open("/etc/os-release", "r") as f: + os_release_content = f.read() + return f'ID="{id}"' in os_release_content + return False + +# llvm +def get_llvm_package_info(): + system = platform.system() + try: + arch = {"x86_64": "x64", "arm64": "arm64", "aarch64": "arm64"}[platform.machine()] + except KeyError: + arch = platform.machine() + if system == "Darwin": + system_suffix = f"macos-{arch}" + elif system == "Linux": + if arch == 'arm64' and is_linux_os('almalinux'): + system_suffix = 'almalinux-arm64' + elif arch == 'arm64': + system_suffix = 'ubuntu-arm64' + elif arch == 'x64': + vglibc = tuple(map(int, platform.libc_ver()[1].split('.'))) + vglibc = vglibc[0] * 100 + vglibc[1] + if vglibc > 228: + # Ubuntu 24 LTS (v2.39) + # Ubuntu 22 LTS (v2.35) + # Ubuntu 20 LTS (v2.31) + system_suffix = "ubuntu-x64" + elif vglibc > 217: + # Manylinux_2.28 (v2.28) + # AlmaLinux 8 (v2.28) + system_suffix = "almalinux-x64" + else: + # Manylinux_2014 (v2.17) + # CentOS 7 (v2.17) + system_suffix = "centos-x64" + else: + print( + f"LLVM pre-compiled image is not available for {system}-{arch}. Proceeding with user-configured LLVM from source build." + ) + return Package("llvm", "LLVM-C.lib", "", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") + else: + print( + f"LLVM pre-compiled image is not available for {system}-{arch}. Proceeding with user-configured LLVM from source build." + ) + return Package("llvm", "LLVM-C.lib", "", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") + # use_assert_enabled_llvm = check_env_flag("TRITON_USE_ASSERT_ENABLED_LLVM", "False") + # release_suffix = "assert" if use_assert_enabled_llvm else "release" + llvm_hash_path = os.path.join(get_base_dir(), "cmake", "llvm-hash.txt") + with open(llvm_hash_path, "r") as llvm_hash_file: + rev = llvm_hash_file.read(8) + name = f"llvm-{rev}-{system_suffix}" + url = f"https://oaitriton.blob.core.windows.net/public/llvm-builds/{name}.tar.gz" + return Package("llvm", name, url, "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") + + +def open_url(url): + user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0' + headers = { + 'User-Agent': user_agent, + } + request = urllib.request.Request(url, None, headers) + # Set timeout to 300 seconds to prevent the request from hanging forever. + return urllib.request.urlopen(request, timeout=300) + + +# ---- package data --- + + +def get_triton_cache_path(): + user_home = os.getenv("TRITON_HOME") + if not user_home: + user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None + if not user_home: + raise RuntimeError("Could not find user home directory") + return os.path.join(user_home, ".triton") + + +def get_thirdparty_packages(packages: list): + triton_cache_path = get_triton_cache_path() + thirdparty_cmake_args = [] + for p in packages: + package_root_dir = os.path.join(triton_cache_path, p.package) + package_dir = os.path.join(package_root_dir, p.name) + if os.environ.get(p.syspath_var_name): + package_dir = os.environ[p.syspath_var_name] + version_file_path = os.path.join(package_dir, "version.txt") + + input_defined = p.syspath_var_name in os.environ + input_exists = os.path.exists(version_file_path) + input_compatible = input_exists and Path(version_file_path).read_text() == p.url + + if is_offline_build() and not input_defined: + raise RuntimeError(f"Requested an offline build but {p.syspath_var_name} is not set") + if not is_offline_build() and not input_defined and not input_compatible: + with contextlib.suppress(Exception): + shutil.rmtree(package_root_dir) + os.makedirs(package_root_dir, exist_ok=True) + print(f'downloading and extracting {p.url} ...') + with open_url(p.url) as response: + if p.url.endswith(".zip"): + file_bytes = BytesIO(response.read()) + with zipfile.ZipFile(file_bytes, "r") as file: + file.extractall(path=package_root_dir) + else: + with tarfile.open(fileobj=response, mode="r|*") as file: + file.extractall(path=package_root_dir) + # write version url to package_dir + with open(os.path.join(package_dir, "version.txt"), "w") as f: + f.write(p.url) + if p.include_flag: + thirdparty_cmake_args.append(f"-D{p.include_flag}={package_dir}/include") + if p.lib_flag: + thirdparty_cmake_args.append(f"-D{p.lib_flag}={package_dir}/lib") + return thirdparty_cmake_args + + +def download_and_copy(name, src_path, dst_path, variable, version, url_func): + if is_offline_build(): + return + triton_cache_path = get_triton_cache_path() + if variable in os.environ: + return + base_dir = os.path.dirname(__file__) + system = platform.system() + try: + arch = {"x86_64": "64", "arm64": "aarch64", "aarch64": "aarch64"}[platform.machine()] + except KeyError: + arch = platform.machine() + supported = {"Linux": "linux", "Darwin": "linux"} + url = url_func(supported[system], arch, version) + tmp_path = os.path.join(triton_cache_path, "nvidia", name) # path to cache the download + dst_path = os.path.join(base_dir, os.pardir, "third_party", "nvidia", "backend", dst_path) # final binary path + platform_name = "sbsa-linux" if arch == "aarch64" else "x86_64-linux" + src_path = src_path(platform_name, version) if callable(src_path) else src_path + src_path = os.path.join(tmp_path, src_path) + download = not os.path.exists(src_path) + if os.path.exists(dst_path) and system == "Linux" and shutil.which(dst_path) is not None: + curr_version = subprocess.check_output([dst_path, "--version"]).decode("utf-8").strip() + curr_version = re.search(r"V([.|\d]+)", curr_version).group(1) + download = download or curr_version != version + if download: + print(f'downloading and extracting {url} ...') + file = tarfile.open(fileobj=open_url(url), mode="r|*") + file.extractall(path=tmp_path) + os.makedirs(os.path.split(dst_path)[0], exist_ok=True) + print(f'copy {src_path} to {dst_path} ...') + if os.path.isdir(src_path): + shutil.copytree(src_path, dst_path, dirs_exist_ok=True) + else: + shutil.copy(src_path, dst_path) + + +# ---- cmake extension ---- + + +def get_base_dir(): + return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) + + +def get_cmake_dir(): + plat_name = sysconfig.get_platform() + python_version = sysconfig.get_python_version() + dir_name = f"cmake.{plat_name}-{sys.implementation.name}-{python_version}" + cmake_dir = Path(get_base_dir()) / "python" / "build" / dir_name + cmake_dir.mkdir(parents=True, exist_ok=True) + return cmake_dir + + +class CMakeClean(clean): + + def initialize_options(self): + clean.initialize_options(self) + self.build_temp = get_cmake_dir() + + +class CMakeBuildPy(build_py): + + def run(self) -> None: + self.run_command('build_ext') + return super().run() + + +class CMakeExtension(Extension): + + def __init__(self, name, path, sourcedir=""): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + self.path = path + + +class CMakeBuild(build_ext): + + user_options = build_ext.user_options + \ + [('base-dir=', None, 'base directory of Triton')] + + def initialize_options(self): + build_ext.initialize_options(self) + self.base_dir = get_base_dir() + + def finalize_options(self): + build_ext.finalize_options(self) + + def run(self): + try: + out = subprocess.check_output(["cmake", "--version"]) + except OSError: + raise RuntimeError("CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) + + match = re.search(r"version\s*(?P\d+)\.(?P\d+)([\d.]+)?", out.decode()) + cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor")) + if (cmake_major, cmake_minor) < (3, 18): + raise RuntimeError("CMake >= 3.18.0 is required") + + for ext in self.extensions: + self.build_extension(ext) + + def get_pybind11_cmake_args(self): + pybind11_sys_path = get_env_with_keys(["PYBIND11_SYSPATH"]) + if pybind11_sys_path: + pybind11_include_dir = os.path.join(pybind11_sys_path, "include") + else: + pybind11_include_dir = pybind11.get_include() + return [f"-DPYBIND11_INCLUDE_DIR={pybind11_include_dir}"] + + def get_proton_cmake_args(self): + cmake_args = get_thirdparty_packages([get_json_package_info()]) + cmake_args += self.get_pybind11_cmake_args() + cupti_include_dir = get_env_with_keys(["TRITON_CUPTI_INCLUDE_PATH"]) + if cupti_include_dir == "": + cupti_include_dir = os.path.join(get_base_dir(), "third_party", "nvidia", "backend", "include") + cmake_args += ["-DCUPTI_INCLUDE_DIR=" + cupti_include_dir] + cupti_lib_dir = get_env_with_keys(["TRITON_CUPTI_LIB_PATH"]) + if cupti_lib_dir == "": + cupti_lib_dir = os.path.join(get_base_dir(), "third_party", "nvidia", "backend", "lib", "cupti") + cmake_args += ["-DCUPTI_LIB_DIR=" + cupti_lib_dir] + roctracer_include_dir = get_env_with_keys(["ROCTRACER_INCLUDE_PATH"]) + if roctracer_include_dir == "": + roctracer_include_dir = os.path.join(get_base_dir(), "third_party", "amd", "backend", "include") + cmake_args += ["-DROCTRACER_INCLUDE_DIR=" + roctracer_include_dir] + return cmake_args + + def build_extension(self, ext): + lit_dir = shutil.which('lit') + ninja_dir = shutil.which('ninja') + # lit is used by the test suite + thirdparty_cmake_args = get_thirdparty_packages([get_llvm_package_info()]) + thirdparty_cmake_args += self.get_pybind11_cmake_args() + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path))) + # create build directories + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + # python directories + python_include_dir = sysconfig.get_path("platinclude") + cmake_args = [ + "-G", "Ninja", # Ninja is much faster than make + "-DCMAKE_MAKE_PROGRAM=" + + ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DLLVM_ENABLE_WERROR=ON", + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DTRITON_BUILD_TUTORIALS=OFF", + "-DTRITON_BUILD_PYTHON_MODULE=ON", "-DPython3_EXECUTABLE:FILEPATH=" + sys.executable, + "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", "-DPYTHON_INCLUDE_DIRS=" + python_include_dir, + "-DTRITON_CODEGEN_BACKENDS=" + ';'.join([b.name for b in backends if not b.is_external]), + "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]) + ] + if lit_dir is not None: + cmake_args.append("-DLLVM_EXTERNAL_LIT=" + lit_dir) + cmake_args.extend(thirdparty_cmake_args) + + # configuration + cfg = get_build_type() + build_args = ["--config", cfg] + + if platform.system() == "Windows": + cmake_args += [f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] + if sys.maxsize > 2**32: + cmake_args += ["-A", "x64"] + else: + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] + max_jobs = os.getenv("MAX_JOBS", str(2 * os.cpu_count())) + build_args += ['-j' + max_jobs] + + if check_env_flag("TRITON_BUILD_WITH_CLANG_LLD"): + cmake_args += [ + "-DCMAKE_C_COMPILER=clang", + "-DCMAKE_CXX_COMPILER=clang++", + "-DCMAKE_LINKER=lld", + "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld", + "-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld", + "-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld", + ] + + # Note that asan doesn't work with binaries that use the GPU, so this is + # only useful for tools like triton-opt that don't run code on the GPU. + # + # I tried and gave up getting msan to work. It seems that libstdc++'s + # std::string does not play nicely with clang's msan (I didn't try + # gcc's). I was unable to configure clang to ignore the error, and I + # also wasn't able to get libc++ to work, but that doesn't mean it's + # impossible. :) + if check_env_flag("TRITON_BUILD_WITH_ASAN"): + cmake_args += [ + "-DCMAKE_C_FLAGS=-fsanitize=address", + "-DCMAKE_CXX_FLAGS=-fsanitize=address", + ] + + if check_env_flag("TRITON_BUILD_WITH_CCACHE"): + cmake_args += [ + "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + ] + + if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON + cmake_args += self.get_proton_cmake_args() + else: + cmake_args += ["-DTRITON_BUILD_PROTON=OFF"] + + if is_offline_build(): + # unit test builds fetch googletests from GitHub + cmake_args += ["-DTRITON_BUILD_UT=OFF"] + + cmake_args_append = os.getenv("TRITON_APPEND_CMAKE_ARGS") + if cmake_args_append is not None: + cmake_args += shlex.split(cmake_args_append) + + env = os.environ.copy() + cmake_dir = get_cmake_dir() + subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=cmake_dir, env=env) + subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=cmake_dir) + subprocess.check_call(["cmake", "--build", ".", "--target", "mlir-doc"], cwd=cmake_dir) + + +nvidia_version_path = os.path.join(get_base_dir(), "cmake", "nvidia-toolchain-version.json") +with open(nvidia_version_path, "r") as nvidia_version_file: + # parse this json file to get the version of the nvidia toolchain + NVIDIA_TOOLCHAIN_VERSION = json.load(nvidia_version_file) + + +def get_platform_dependent_src_path(subdir): + return lambda platform, version: ( + (lambda version_major, version_minor1, version_minor2, : f"targets/{platform}/{subdir}" + if int(version_major) >= 12 and int(version_minor1) >= 5 else subdir)(*version.split('.'))) + + +download_and_copy( + name="ptxas", src_path="bin/ptxas", dst_path="bin/ptxas", variable="TRITON_PTXAS_PATH", + version=NVIDIA_TOOLCHAIN_VERSION["ptxas"], url_func=lambda system, arch, version: + ((lambda version_major, version_minor1, version_minor2: + f"https://anaconda.org/nvidia/cuda-nvcc-tools/{version}/download/{system}-{arch}/cuda-nvcc-tools-{version}-0.tar.bz2" + if int(version_major) >= 12 and int(version_minor1) >= 5 else + f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/{system}-{arch}/cuda-nvcc-{version}-0.tar.bz2") + (*version.split('.')))) +download_and_copy( + name="cuobjdump", + src_path="bin/cuobjdump", + dst_path="bin/cuobjdump", + variable="TRITON_CUOBJDUMP_PATH", + version=NVIDIA_TOOLCHAIN_VERSION["cuobjdump"], + url_func=lambda system, arch, version: + f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/{system}-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", +) +download_and_copy( + name="nvdisasm", + src_path="bin/nvdisasm", + dst_path="bin/nvdisasm", + variable="TRITON_NVDISASM_PATH", + version=NVIDIA_TOOLCHAIN_VERSION["nvdisasm"], + url_func=lambda system, arch, version: + f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/{system}-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", +) +download_and_copy( + name="cudacrt", src_path=get_platform_dependent_src_path("include"), dst_path="include", + variable="TRITON_CUDACRT_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cudacrt"], url_func=lambda system, arch, version: + ((lambda version_major, version_minor1, version_minor2: + f"https://anaconda.org/nvidia/cuda-crt-dev_{system}-{arch}/{version}/download/noarch/cuda-crt-dev_{system}-{arch}-{version}-0.tar.bz2" + if int(version_major) >= 12 and int(version_minor1) >= 5 else + f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/{system}-{arch}/cuda-nvcc-{version}-0.tar.bz2") + (*version.split('.')))) +download_and_copy( + name="cudart", src_path=get_platform_dependent_src_path("include"), dst_path="include", + variable="TRITON_CUDART_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cudart"], url_func=lambda system, arch, version: + ((lambda version_major, version_minor1, version_minor2: + f"https://anaconda.org/nvidia/cuda-cudart-dev_{system}-{arch}/{version}/download/noarch/cuda-cudart-dev_{system}-{arch}-{version}-0.tar.bz2" + if int(version_major) >= 12 and int(version_minor1) >= 5 else + f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/{system}-{arch}/cuda-cudart-dev-{version}-0.tar.bz2" + )(*version.split('.')))) +download_and_copy( + name="cupti", src_path=get_platform_dependent_src_path("include"), dst_path="include", + variable="TRITON_CUPTI_INCLUDE_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cupti"], + url_func=lambda system, arch, version: + ((lambda version_major, version_minor1, version_minor2: + f"https://anaconda.org/nvidia/cuda-cupti-dev/{version}/download/{system}-{arch}/cuda-cupti-dev-{version}-0.tar.bz2" + if int(version_major) >= 12 and int(version_minor1) >= 5 else + f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/{system}-{arch}/cuda-cupti-{version}-0.tar.bz2") + (*version.split('.')))) +download_and_copy( + name="cupti", src_path=get_platform_dependent_src_path("lib"), dst_path="lib/cupti", + variable="TRITON_CUPTI_LIB_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cupti"], url_func=lambda system, arch, version: + ((lambda version_major, version_minor1, version_minor2: + f"https://anaconda.org/nvidia/cuda-cupti-dev/{version}/download/{system}-{arch}/cuda-cupti-dev-{version}-0.tar.bz2" + if int(version_major) >= 12 and int(version_minor1) >= 5 else + f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/{system}-{arch}/cuda-cupti-{version}-0.tar.bz2") + (*version.split('.')))) + +backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()] + + +def add_link_to_backends(): + for backend in backends: + if os.path.islink(backend.install_dir): + os.unlink(backend.install_dir) + if os.path.exists(backend.install_dir): + shutil.rmtree(backend.install_dir) + os.symlink(backend.backend_dir, backend.install_dir) + + if backend.language_dir: + # Link the contents of each backend's `language` directory into + # `triton.language.extra`. + extra_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "triton", "language", "extra")) + for x in os.listdir(backend.language_dir): + src_dir = os.path.join(backend.language_dir, x) + install_dir = os.path.join(extra_dir, x) + if os.path.islink(install_dir): + os.unlink(install_dir) + if os.path.exists(install_dir): + shutil.rmtree(install_dir) + os.symlink(src_dir, install_dir) + + +def add_link_to_proton(): + proton_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "third_party", "proton", "proton")) + proton_install_dir = os.path.join(os.path.dirname(__file__), "triton", "profiler") + if os.path.islink(proton_install_dir): + os.unlink(proton_install_dir) + if os.path.exists(proton_install_dir): + shutil.rmtree(proton_install_dir) + os.symlink(proton_dir, proton_install_dir) + + +def add_links(): + add_link_to_backends() + if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON + add_link_to_proton() + + +class plugin_install(install): + + def run(self): + add_links() + install.run(self) + + +class plugin_develop(develop): + + def run(self): + add_links() + develop.run(self) + + +class plugin_bdist_wheel(bdist_wheel): + + def run(self): + add_links() + bdist_wheel.run(self) + + +class plugin_egginfo(egg_info): + + def run(self): + add_links() + egg_info.run(self) + + +package_data = { + "triton/tools": ["compile.h", "compile.c"], **{f"triton/backends/{b.name}": b.package_data + for b in backends}, "triton/language/extra": sum( + (b.language_package_data for b in backends), []) +} + + +def get_language_extra_packages(): + packages = [] + for backend in backends: + if backend.language_dir is None: + continue + + # Walk the `language` directory of each backend to enumerate + # any subpackages, which will be added to `triton.language.extra`. + for dir, dirs, files in os.walk(backend.language_dir, followlinks=True): + if not any(f for f in files if f.endswith(".py")) or dir == backend.language_dir: + # Ignore directories with no python files. + # Also ignore the root directory which corresponds to + # "triton/language/extra". + continue + subpackage = os.path.relpath(dir, backend.language_dir) + package = os.path.join("triton/language/extra", subpackage) + packages.append(package) + + return list(packages) + + +def get_packages(): + packages = [ + "triton", + "triton/_C", + "triton/compiler", + "triton/language", + "triton/language/extra", + "triton/runtime", + "triton/backends", + "triton/tools", + ] + packages += [f'triton/backends/{backend.name}' for backend in backends] + packages += get_language_extra_packages() + if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON + packages += ["triton/profiler"] + + return packages + + +def get_entry_points(): + entry_points = {} + if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON + entry_points["console_scripts"] = [ + "proton-viewer = triton.profiler.viewer:main", + "proton = triton.profiler.proton:main", + ] + return entry_points + + +def get_git_commit_hash(length=8): + try: + cmd = ['git', 'rev-parse', f'--short={length}', 'HEAD'] + return "+git{}".format(subprocess.check_output(cmd).strip().decode('utf-8')) + except Exception: + return "" + + +setup( + name=os.environ.get("TRITON_WHEEL_NAME", "triton"), + version="3.2.0" + os.environ.get("TRITON_WHEEL_VERSION_SUFFIX", ""), + author="Philippe Tillet", + author_email="phil@openai.com", + description="A language and compiler for custom Deep Learning operations", + long_description="", + packages=get_packages(), + entry_points=get_entry_points(), + package_data=package_data, + include_package_data=True, + ext_modules=[CMakeExtension("triton", "triton/_C/")], + cmdclass={ + "build_ext": CMakeBuild, + "build_py": CMakeBuildPy, + "clean": CMakeClean, + "install": plugin_install, + "develop": plugin_develop, + "bdist_wheel": plugin_bdist_wheel, + "egg_info": plugin_egginfo, + }, + zip_safe=False, + # for PyPI + keywords=["Compiler", "Deep Learning"], + url="https://github.com/triton-lang/triton/", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Software Development :: Build Tools", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + ], + test_suite="tests", + extras_require={ + "build": [ + "cmake>=3.20", + "lit", + ], + "tests": [ + "autopep8", + "flake8", + "isort", + "numpy", + "pytest", + "scipy>=1.7.1", + "llnl-hatchet", + ], + "tutorials": [ + "matplotlib", + "pandas", + "tabulate", + ], + }, +) From 697bc4cdf53f62cc4bb436115d496ba030ecd035 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:02:32 +0200 Subject: [PATCH 24/51] Fix --- .github/workflows/test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7214778..72391f8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,19 +15,20 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + path: vllm - name: Checkout uses: actions/checkout@v4 with: repository: triton-lang/triton ref: v3.2.0 - path: triton - uses: actions/setup-python@v5 with: python-version: "3.11" - name: Patch setup run: | - cp triton/patches/setup.cfg triton/python/setup.cfg - cp triton/patches/setup.py triton/python/setup.py + cp vllm/triton/patches/setup.cfg triton/python/setup.cfg + cp vllm/triton/patches/setup.py triton/python/setup.py - name: Build wheels env: CIBW_BEFORE_ALL: "dnf install clang lld -y" From 596f10cf4c21690501cf74ff7888eaf70f77183c Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:07:25 +0200 Subject: [PATCH 25/51] What? --- .github/workflows/test.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 72391f8..bba4408 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,21 +14,22 @@ jobs: concurrency: arm64 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: path: vllm - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: repository: triton-lang/triton ref: v3.2.0 + path: triton - uses: actions/setup-python@v5 with: python-version: "3.11" - name: Patch setup run: | - cp vllm/triton/patches/setup.cfg triton/python/setup.cfg - cp vllm/triton/patches/setup.py triton/python/setup.py + cp ./vllm/triton/patches/setup.cfg ./triton/python/setup.cfg + cp ./vllm/triton/patches/setup.py ./triton/python/setup.py - name: Build wheels env: CIBW_BEFORE_ALL: "dnf install clang lld -y" @@ -62,7 +63,7 @@ jobs: sudo rm -rf /opt/hostedtoolcache df -h - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 From 9bcb79a59f65a64ba2a459b5eb0d0ae02348ae0a Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:10:36 +0200 Subject: [PATCH 26/51] Huh --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bba4408..4b84f89 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: - name: Checkout uses: actions/checkout@v5 with: - path: vllm + path: main - name: Checkout uses: actions/checkout@v5 with: @@ -28,8 +28,8 @@ jobs: python-version: "3.11" - name: Patch setup run: | - cp ./vllm/triton/patches/setup.cfg ./triton/python/setup.cfg - cp ./vllm/triton/patches/setup.py ./triton/python/setup.py + cp main/triton/patches/setup.cfg triton/python/setup.cfg + cp main/triton/patches/setup.py triton/python/setup.py - name: Build wheels env: CIBW_BEFORE_ALL: "dnf install clang lld -y" From 3821e66cfc0d50c40e1a982c138f579683a28864 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:30:27 +0200 Subject: [PATCH 27/51] Update llvm build hash --- .github/workflows/test.yml | 12 ++++++------ triton/patches/cmake/llvm-hash.txt | 1 + triton/patches/{ => python}/setup.cfg | 0 triton/patches/{ => python}/setup.py | 0 4 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 triton/patches/cmake/llvm-hash.txt rename triton/patches/{ => python}/setup.cfg (100%) rename triton/patches/{ => python}/setup.py (100%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4b84f89..d69260c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,17 +28,17 @@ jobs: python-version: "3.11" - name: Patch setup run: | - cp main/triton/patches/setup.cfg triton/python/setup.cfg - cp main/triton/patches/setup.py triton/python/setup.py + cp -r main/triton/patches/python/ triton/python/ + cp -r main/triton/patches/cmake/ triton/cmake/ - name: Build wheels + uses: pypa/cibuildwheel@v3.2.0 env: CIBW_BEFORE_ALL: "dnf install clang lld -y" CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - run: | - python --version - python3 -m pip install cibuildwheel --user - python3 -m cibuildwheel triton/python --output-dir wheelhouse + with: + package-dir: triton/python + output-dir: wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 with: diff --git a/triton/patches/cmake/llvm-hash.txt b/triton/patches/cmake/llvm-hash.txt new file mode 100644 index 0000000..28e49cc --- /dev/null +++ b/triton/patches/cmake/llvm-hash.txt @@ -0,0 +1 @@ +a66376b0dc3b2ea8a84fda26faca287980986f78 diff --git a/triton/patches/setup.cfg b/triton/patches/python/setup.cfg similarity index 100% rename from triton/patches/setup.cfg rename to triton/patches/python/setup.cfg diff --git a/triton/patches/setup.py b/triton/patches/python/setup.py similarity index 100% rename from triton/patches/setup.py rename to triton/patches/python/setup.py From cd76cc399b60167de0f6ffa4bbbb1e4ebbdef9ba Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:33:39 +0200 Subject: [PATCH 28/51] Explicit --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d69260c..6299cb7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,8 +28,9 @@ jobs: python-version: "3.11" - name: Patch setup run: | - cp -r main/triton/patches/python/ triton/python/ - cp -r main/triton/patches/cmake/ triton/cmake/ + cp main/triton/patches/python/ triton/python/setup.cfg + cp main/triton/patches/setup.py triton/python/setup.py + cp main/triton/patches/llvm-hash.txt triton/cmake/llvm-hash.txt - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: From 69bcbefa44e02eecc84de59cbec17d22e126924a Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:34:58 +0200 Subject: [PATCH 29/51] Again --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6299cb7..70d1655 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,9 +28,8 @@ jobs: python-version: "3.11" - name: Patch setup run: | - cp main/triton/patches/python/ triton/python/setup.cfg - cp main/triton/patches/setup.py triton/python/setup.py - cp main/triton/patches/llvm-hash.txt triton/cmake/llvm-hash.txt + cp -r main/triton/patches/python/* triton/python/ + cp -r main/triton/patches/cmake/* triton/cmake/ - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: From 7d990ef5e020a0b5d1b1791e2671b906148ab578 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:40:27 +0200 Subject: [PATCH 30/51] Try 3.3 --- .github/workflows/test.yml | 82 +++++++++++++++++++------------------- Dockerfile.arm64 | 9 +++-- 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 70d1655..997eb5b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,41 +9,41 @@ env: IMAGE: zappi/vllm-openai jobs: - build-triton-wheel-arm64: - runs-on: ubuntu-22.04-arm - concurrency: arm64 - steps: - - name: Checkout - uses: actions/checkout@v5 - with: - path: main - - name: Checkout - uses: actions/checkout@v5 - with: - repository: triton-lang/triton - ref: v3.2.0 - path: triton - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Patch setup - run: | - cp -r main/triton/patches/python/* triton/python/ - cp -r main/triton/patches/cmake/* triton/cmake/ - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.0 - env: - CIBW_BEFORE_ALL: "dnf install clang lld -y" - CIBW_BUILD: "cp312-manylinux_aarch64" - CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - with: - package-dir: triton/python - output-dir: wheelhouse - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/*.whl + # build-triton-wheel-arm64: + # runs-on: ubuntu-22.04-arm + # concurrency: arm64 + # steps: + # - name: Checkout + # uses: actions/checkout@v5 + # with: + # path: main + # - name: Checkout + # uses: actions/checkout@v5 + # with: + # repository: triton-lang/triton + # ref: v3.2.0 + # path: triton + # - uses: actions/setup-python@v5 + # with: + # python-version: "3.11" + # - name: Patch setup + # run: | + # cp -r main/triton/patches/python/* triton/python/ + # cp -r main/triton/patches/cmake/* triton/cmake/ + # - name: Build wheels + # uses: pypa/cibuildwheel@v3.2.0 + # env: + # CIBW_BEFORE_ALL: "dnf install clang lld -y" + # CIBW_BUILD: "cp312-manylinux_aarch64" + # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + # with: + # package-dir: triton/python + # output-dir: wheelhouse + # - name: Upload wheels + # uses: actions/upload-artifact@v4 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/*.whl build-amd64: runs-on: ubuntu-latest @@ -85,7 +85,7 @@ jobs: build-arm64: runs-on: ubuntu-latest - needs: build-triton-wheel-arm64 + # needs: build-triton-wheel-arm64 concurrency: arm64 steps: - name: Free additional disk space @@ -103,11 +103,11 @@ jobs: df -h - name: Checkout uses: actions/checkout@v4 - - name: Download a single artifact - uses: actions/download-artifact@v5 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/ + # - name: Download a single artifact + # uses: actions/download-artifact@v5 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/ - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 43a6040..54e0536 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,5 +1,8 @@ FROM vllm/vllm-openai:v0.10.2 -RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ - --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system /wheelhouse/*.whl +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system triton==3.3.0 + +# RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ +# --mount=type=cache,target=/root/.cache/uv \ +# uv pip install --system /wheelhouse/*.whl From 3f0c36b309cfb2c2e47131dcd4ad343eb208f8ac Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:57:09 +0200 Subject: [PATCH 31/51] 3.3.1 --- Dockerfile.amd64 | 2 +- Dockerfile.arm64 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 index bd52886..879e532 100644 --- a/Dockerfile.amd64 +++ b/Dockerfile.amd64 @@ -1,4 +1,4 @@ FROM vllm/vllm-openai:v0.10.2 RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==3.2.0 + uv pip install --system triton==3.3.1 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 54e0536..5fe199d 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,7 +1,7 @@ FROM vllm/vllm-openai:v0.10.2 RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==3.3.0 + uv pip install --system triton==3.3.1 # RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ # --mount=type=cache,target=/root/.cache/uv \ From d67b932f21028a4bc9e0447cc4d959e559316598 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 13:58:34 +0200 Subject: [PATCH 32/51] Faster --- .github/workflows/test.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 997eb5b..24933c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,13 +52,13 @@ jobs: - name: Free additional disk space run: | df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet - echo "Removing haskell" - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/.ghcup + # echo "Removing android..." + # sudo rm -rf /usr/local/lib/android + # echo "Removing dotnet..." + # sudo rm -rf /usr/share/dotnet + # echo "Removing haskell" + # sudo rm -rf /opt/ghc + # sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." sudo rm -rf /opt/hostedtoolcache df -h @@ -91,13 +91,13 @@ jobs: - name: Free additional disk space run: | df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet - echo "Removing haskell" - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/.ghcup + # echo "Removing android..." + # sudo rm -rf /usr/local/lib/android + # echo "Removing dotnet..." + # sudo rm -rf /usr/share/dotnet + # echo "Removing haskell" + # sudo rm -rf /opt/ghc + # sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." sudo rm -rf /opt/hostedtoolcache df -h From c90737e14221f0a67cd929cd49d30d88e0bcdf23 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 14:05:01 +0200 Subject: [PATCH 33/51] More space --- .github/workflows/test.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 24933c7..997eb5b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,13 +52,13 @@ jobs: - name: Free additional disk space run: | df -h - # echo "Removing android..." - # sudo rm -rf /usr/local/lib/android - # echo "Removing dotnet..." - # sudo rm -rf /usr/share/dotnet - # echo "Removing haskell" - # sudo rm -rf /opt/ghc - # sudo rm -rf /usr/local/.ghcup + echo "Removing android..." + sudo rm -rf /usr/local/lib/android + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet + echo "Removing haskell" + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." sudo rm -rf /opt/hostedtoolcache df -h @@ -91,13 +91,13 @@ jobs: - name: Free additional disk space run: | df -h - # echo "Removing android..." - # sudo rm -rf /usr/local/lib/android - # echo "Removing dotnet..." - # sudo rm -rf /usr/share/dotnet - # echo "Removing haskell" - # sudo rm -rf /opt/ghc - # sudo rm -rf /usr/local/.ghcup + echo "Removing android..." + sudo rm -rf /usr/local/lib/android + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet + echo "Removing haskell" + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." sudo rm -rf /opt/hostedtoolcache df -h From 869c196c9b4617065331098a5d5490bb40205ed0 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 14:28:02 +0200 Subject: [PATCH 34/51] I think we go this now --- .github/workflows/test.yml | 81 +++++++++++++++--------------- Dockerfile.arm64 | 9 ++-- triton/patches/cmake/llvm-hash.txt | 1 - 3 files changed, 43 insertions(+), 48 deletions(-) delete mode 100644 triton/patches/cmake/llvm-hash.txt diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 997eb5b..a918855 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,41 +9,40 @@ env: IMAGE: zappi/vllm-openai jobs: - # build-triton-wheel-arm64: - # runs-on: ubuntu-22.04-arm - # concurrency: arm64 - # steps: - # - name: Checkout - # uses: actions/checkout@v5 - # with: - # path: main - # - name: Checkout - # uses: actions/checkout@v5 - # with: - # repository: triton-lang/triton - # ref: v3.2.0 - # path: triton - # - uses: actions/setup-python@v5 - # with: - # python-version: "3.11" - # - name: Patch setup - # run: | - # cp -r main/triton/patches/python/* triton/python/ - # cp -r main/triton/patches/cmake/* triton/cmake/ - # - name: Build wheels - # uses: pypa/cibuildwheel@v3.2.0 - # env: - # CIBW_BEFORE_ALL: "dnf install clang lld -y" - # CIBW_BUILD: "cp312-manylinux_aarch64" - # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - # with: - # package-dir: triton/python - # output-dir: wheelhouse - # - name: Upload wheels - # uses: actions/upload-artifact@v4 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/*.whl + build-triton-wheel-arm64: + runs-on: ubuntu-22.04-arm + concurrency: arm64 + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + path: main + - name: Checkout + uses: actions/checkout@v5 + with: + repository: triton-lang/triton + ref: v3.3.1 + path: triton + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Patch setup + run: | + cp -r main/triton/patches/python/* triton/python/ + - name: Build wheels + uses: pypa/cibuildwheel@v3.2.0 + env: + CIBW_BEFORE_ALL: "dnf install clang lld -y" + CIBW_BUILD: "cp312-manylinux_aarch64" + CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + with: + package-dir: triton/python + output-dir: wheelhouse + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/*.whl build-amd64: runs-on: ubuntu-latest @@ -85,7 +84,7 @@ jobs: build-arm64: runs-on: ubuntu-latest - # needs: build-triton-wheel-arm64 + needs: build-triton-wheel-arm64 concurrency: arm64 steps: - name: Free additional disk space @@ -103,11 +102,11 @@ jobs: df -h - name: Checkout uses: actions/checkout@v4 - # - name: Download a single artifact - # uses: actions/download-artifact@v5 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/ + - name: Download a single artifact + uses: actions/download-artifact@v5 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/ - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 5fe199d..8fa7571 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,8 +1,5 @@ FROM vllm/vllm-openai:v0.10.2 -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==3.3.1 - -# RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ -# --mount=type=cache,target=/root/.cache/uv \ -# uv pip install --system /wheelhouse/*.whl +RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ + --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system /wheelhouse/triton-*.whl diff --git a/triton/patches/cmake/llvm-hash.txt b/triton/patches/cmake/llvm-hash.txt deleted file mode 100644 index 28e49cc..0000000 --- a/triton/patches/cmake/llvm-hash.txt +++ /dev/null @@ -1 +0,0 @@ -a66376b0dc3b2ea8a84fda26faca287980986f78 From cbe672af9bd7f5a46873f491a93e3699c459710c Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 14:28:44 +0200 Subject: [PATCH 35/51] Remove python setup --- .github/workflows/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a918855..9695b1a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,9 +23,6 @@ jobs: repository: triton-lang/triton ref: v3.3.1 path: triton - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - name: Patch setup run: | cp -r main/triton/patches/python/* triton/python/ From 3fa261994bba1a2963b0b872a86c0d96dbbfa023 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 14:33:55 +0200 Subject: [PATCH 36/51] Now --- triton/patches/python/setup.py | 761 --------------------------------- 1 file changed, 761 deletions(-) delete mode 100644 triton/patches/python/setup.py diff --git a/triton/patches/python/setup.py b/triton/patches/python/setup.py deleted file mode 100644 index f9d78cc..0000000 --- a/triton/patches/python/setup.py +++ /dev/null @@ -1,761 +0,0 @@ -import os -import platform -import re -import contextlib -import shlex -import shutil -import subprocess -import sys -import sysconfig -import tarfile -import zipfile -import urllib.request -import json -from io import BytesIO -from distutils.command.clean import clean -from pathlib import Path -from typing import List, NamedTuple, Optional - -from setuptools import Extension, setup -from setuptools.command.build_ext import build_ext -from setuptools.command.build_py import build_py -from dataclasses import dataclass - -from distutils.command.install import install -from setuptools.command.develop import develop -from setuptools.command.egg_info import egg_info -from wheel.bdist_wheel import bdist_wheel - -import pybind11 - - -@dataclass -class Backend: - name: str - package_data: List[str] - language_package_data: List[str] - src_dir: str - backend_dir: str - language_dir: Optional[str] - install_dir: str - is_external: bool - - -class BackendInstaller: - - @staticmethod - def prepare(backend_name: str, backend_src_dir: str = None, is_external: bool = False): - # Initialize submodule if there is one for in-tree backends. - if not is_external: - root_dir = os.path.join(os.pardir, "third_party") - assert backend_name in os.listdir( - root_dir), f"{backend_name} is requested for install but not present in {root_dir}" - - try: - subprocess.run(["git", "submodule", "update", "--init", f"{backend_name}"], check=True, - stdout=subprocess.DEVNULL, cwd=root_dir) - except subprocess.CalledProcessError: - pass - except FileNotFoundError: - pass - - backend_src_dir = os.path.join(root_dir, backend_name) - - backend_path = os.path.abspath(os.path.join(backend_src_dir, "backend")) - assert os.path.exists(backend_path), f"{backend_path} does not exist!" - - language_dir = os.path.abspath(os.path.join(backend_src_dir, "language")) - if not os.path.exists(language_dir): - language_dir = None - - for file in ["compiler.py", "driver.py"]: - assert os.path.exists(os.path.join(backend_path, file)), f"${file} does not exist in ${backend_path}" - - install_dir = os.path.join(os.path.dirname(__file__), "triton", "backends", backend_name) - package_data = [f"{os.path.relpath(p, backend_path)}/*" for p, _, _, in os.walk(backend_path)] - - language_package_data = [] - if language_dir is not None: - language_package_data = [f"{os.path.relpath(p, language_dir)}/*" for p, _, _, in os.walk(language_dir)] - - return Backend(name=backend_name, package_data=package_data, language_package_data=language_package_data, - src_dir=backend_src_dir, backend_dir=backend_path, language_dir=language_dir, - install_dir=install_dir, is_external=is_external) - - # Copy all in-tree backends under triton/third_party. - @staticmethod - def copy(active): - return [BackendInstaller.prepare(backend) for backend in active] - - # Copy all external plugins provided by the `TRITON_PLUGIN_DIRS` env var. - # TRITON_PLUGIN_DIRS is a semicolon-separated list of paths to the plugins. - # Expect to find the name of the backend under dir/backend/name.conf - @staticmethod - def copy_externals(): - backend_dirs = os.getenv("TRITON_PLUGIN_DIRS") - if backend_dirs is None: - return [] - backend_dirs = backend_dirs.strip().split(";") - backend_names = [Path(os.path.join(dir, "backend", "name.conf")).read_text().strip() for dir in backend_dirs] - return [ - BackendInstaller.prepare(backend_name, backend_src_dir=backend_src_dir, is_external=True) - for backend_name, backend_src_dir in zip(backend_names, backend_dirs) - ] - - -# Taken from https://github.com/pytorch/pytorch/blob/master/tools/setup_helpers/env.py -def check_env_flag(name: str, default: str = "") -> bool: - return os.getenv(name, default).upper() in ["ON", "1", "YES", "TRUE", "Y"] - - -def get_build_type(): - if check_env_flag("DEBUG"): - return "Debug" - elif check_env_flag("REL_WITH_DEB_INFO"): - return "RelWithDebInfo" - elif check_env_flag("TRITON_REL_BUILD_WITH_ASSERTS"): - return "TritonRelBuildWithAsserts" - elif check_env_flag("TRITON_BUILD_WITH_O1"): - return "TritonBuildWithO1" - else: - # TODO: change to release when stable enough - return "TritonRelBuildWithAsserts" - - -def get_env_with_keys(key: list): - for k in key: - if k in os.environ: - return os.environ[k] - return "" - - -def is_offline_build() -> bool: - """ - Downstream projects and distributions which bootstrap their own dependencies from scratch - and run builds in offline sandboxes - may set `TRITON_OFFLINE_BUILD` in the build environment to prevent any attempts at downloading - pinned dependencies from the internet or at using dependencies vendored in-tree. - - Dependencies must be defined using respective search paths (cf. `syspath_var_name` in `Package`). - Missing dependencies lead to an early abortion. - Dependencies' compatibility is not verified. - - Note that this flag isn't tested by the CI and does not provide any guarantees. - """ - return check_env_flag("TRITON_OFFLINE_BUILD", "") - - -# --- third party packages ----- - - -class Package(NamedTuple): - package: str - name: str - url: str - include_flag: str - lib_flag: str - syspath_var_name: str - - -# json -def get_json_package_info(): - url = "https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip" - return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH") - -def is_linux_os(id): - if os.path.exists("/etc/os-release"): - with open("/etc/os-release", "r") as f: - os_release_content = f.read() - return f'ID="{id}"' in os_release_content - return False - -# llvm -def get_llvm_package_info(): - system = platform.system() - try: - arch = {"x86_64": "x64", "arm64": "arm64", "aarch64": "arm64"}[platform.machine()] - except KeyError: - arch = platform.machine() - if system == "Darwin": - system_suffix = f"macos-{arch}" - elif system == "Linux": - if arch == 'arm64' and is_linux_os('almalinux'): - system_suffix = 'almalinux-arm64' - elif arch == 'arm64': - system_suffix = 'ubuntu-arm64' - elif arch == 'x64': - vglibc = tuple(map(int, platform.libc_ver()[1].split('.'))) - vglibc = vglibc[0] * 100 + vglibc[1] - if vglibc > 228: - # Ubuntu 24 LTS (v2.39) - # Ubuntu 22 LTS (v2.35) - # Ubuntu 20 LTS (v2.31) - system_suffix = "ubuntu-x64" - elif vglibc > 217: - # Manylinux_2.28 (v2.28) - # AlmaLinux 8 (v2.28) - system_suffix = "almalinux-x64" - else: - # Manylinux_2014 (v2.17) - # CentOS 7 (v2.17) - system_suffix = "centos-x64" - else: - print( - f"LLVM pre-compiled image is not available for {system}-{arch}. Proceeding with user-configured LLVM from source build." - ) - return Package("llvm", "LLVM-C.lib", "", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") - else: - print( - f"LLVM pre-compiled image is not available for {system}-{arch}. Proceeding with user-configured LLVM from source build." - ) - return Package("llvm", "LLVM-C.lib", "", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") - # use_assert_enabled_llvm = check_env_flag("TRITON_USE_ASSERT_ENABLED_LLVM", "False") - # release_suffix = "assert" if use_assert_enabled_llvm else "release" - llvm_hash_path = os.path.join(get_base_dir(), "cmake", "llvm-hash.txt") - with open(llvm_hash_path, "r") as llvm_hash_file: - rev = llvm_hash_file.read(8) - name = f"llvm-{rev}-{system_suffix}" - url = f"https://oaitriton.blob.core.windows.net/public/llvm-builds/{name}.tar.gz" - return Package("llvm", name, url, "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") - - -def open_url(url): - user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0' - headers = { - 'User-Agent': user_agent, - } - request = urllib.request.Request(url, None, headers) - # Set timeout to 300 seconds to prevent the request from hanging forever. - return urllib.request.urlopen(request, timeout=300) - - -# ---- package data --- - - -def get_triton_cache_path(): - user_home = os.getenv("TRITON_HOME") - if not user_home: - user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None - if not user_home: - raise RuntimeError("Could not find user home directory") - return os.path.join(user_home, ".triton") - - -def get_thirdparty_packages(packages: list): - triton_cache_path = get_triton_cache_path() - thirdparty_cmake_args = [] - for p in packages: - package_root_dir = os.path.join(triton_cache_path, p.package) - package_dir = os.path.join(package_root_dir, p.name) - if os.environ.get(p.syspath_var_name): - package_dir = os.environ[p.syspath_var_name] - version_file_path = os.path.join(package_dir, "version.txt") - - input_defined = p.syspath_var_name in os.environ - input_exists = os.path.exists(version_file_path) - input_compatible = input_exists and Path(version_file_path).read_text() == p.url - - if is_offline_build() and not input_defined: - raise RuntimeError(f"Requested an offline build but {p.syspath_var_name} is not set") - if not is_offline_build() and not input_defined and not input_compatible: - with contextlib.suppress(Exception): - shutil.rmtree(package_root_dir) - os.makedirs(package_root_dir, exist_ok=True) - print(f'downloading and extracting {p.url} ...') - with open_url(p.url) as response: - if p.url.endswith(".zip"): - file_bytes = BytesIO(response.read()) - with zipfile.ZipFile(file_bytes, "r") as file: - file.extractall(path=package_root_dir) - else: - with tarfile.open(fileobj=response, mode="r|*") as file: - file.extractall(path=package_root_dir) - # write version url to package_dir - with open(os.path.join(package_dir, "version.txt"), "w") as f: - f.write(p.url) - if p.include_flag: - thirdparty_cmake_args.append(f"-D{p.include_flag}={package_dir}/include") - if p.lib_flag: - thirdparty_cmake_args.append(f"-D{p.lib_flag}={package_dir}/lib") - return thirdparty_cmake_args - - -def download_and_copy(name, src_path, dst_path, variable, version, url_func): - if is_offline_build(): - return - triton_cache_path = get_triton_cache_path() - if variable in os.environ: - return - base_dir = os.path.dirname(__file__) - system = platform.system() - try: - arch = {"x86_64": "64", "arm64": "aarch64", "aarch64": "aarch64"}[platform.machine()] - except KeyError: - arch = platform.machine() - supported = {"Linux": "linux", "Darwin": "linux"} - url = url_func(supported[system], arch, version) - tmp_path = os.path.join(triton_cache_path, "nvidia", name) # path to cache the download - dst_path = os.path.join(base_dir, os.pardir, "third_party", "nvidia", "backend", dst_path) # final binary path - platform_name = "sbsa-linux" if arch == "aarch64" else "x86_64-linux" - src_path = src_path(platform_name, version) if callable(src_path) else src_path - src_path = os.path.join(tmp_path, src_path) - download = not os.path.exists(src_path) - if os.path.exists(dst_path) and system == "Linux" and shutil.which(dst_path) is not None: - curr_version = subprocess.check_output([dst_path, "--version"]).decode("utf-8").strip() - curr_version = re.search(r"V([.|\d]+)", curr_version).group(1) - download = download or curr_version != version - if download: - print(f'downloading and extracting {url} ...') - file = tarfile.open(fileobj=open_url(url), mode="r|*") - file.extractall(path=tmp_path) - os.makedirs(os.path.split(dst_path)[0], exist_ok=True) - print(f'copy {src_path} to {dst_path} ...') - if os.path.isdir(src_path): - shutil.copytree(src_path, dst_path, dirs_exist_ok=True) - else: - shutil.copy(src_path, dst_path) - - -# ---- cmake extension ---- - - -def get_base_dir(): - return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) - - -def get_cmake_dir(): - plat_name = sysconfig.get_platform() - python_version = sysconfig.get_python_version() - dir_name = f"cmake.{plat_name}-{sys.implementation.name}-{python_version}" - cmake_dir = Path(get_base_dir()) / "python" / "build" / dir_name - cmake_dir.mkdir(parents=True, exist_ok=True) - return cmake_dir - - -class CMakeClean(clean): - - def initialize_options(self): - clean.initialize_options(self) - self.build_temp = get_cmake_dir() - - -class CMakeBuildPy(build_py): - - def run(self) -> None: - self.run_command('build_ext') - return super().run() - - -class CMakeExtension(Extension): - - def __init__(self, name, path, sourcedir=""): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) - self.path = path - - -class CMakeBuild(build_ext): - - user_options = build_ext.user_options + \ - [('base-dir=', None, 'base directory of Triton')] - - def initialize_options(self): - build_ext.initialize_options(self) - self.base_dir = get_base_dir() - - def finalize_options(self): - build_ext.finalize_options(self) - - def run(self): - try: - out = subprocess.check_output(["cmake", "--version"]) - except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - - match = re.search(r"version\s*(?P\d+)\.(?P\d+)([\d.]+)?", out.decode()) - cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor")) - if (cmake_major, cmake_minor) < (3, 18): - raise RuntimeError("CMake >= 3.18.0 is required") - - for ext in self.extensions: - self.build_extension(ext) - - def get_pybind11_cmake_args(self): - pybind11_sys_path = get_env_with_keys(["PYBIND11_SYSPATH"]) - if pybind11_sys_path: - pybind11_include_dir = os.path.join(pybind11_sys_path, "include") - else: - pybind11_include_dir = pybind11.get_include() - return [f"-DPYBIND11_INCLUDE_DIR={pybind11_include_dir}"] - - def get_proton_cmake_args(self): - cmake_args = get_thirdparty_packages([get_json_package_info()]) - cmake_args += self.get_pybind11_cmake_args() - cupti_include_dir = get_env_with_keys(["TRITON_CUPTI_INCLUDE_PATH"]) - if cupti_include_dir == "": - cupti_include_dir = os.path.join(get_base_dir(), "third_party", "nvidia", "backend", "include") - cmake_args += ["-DCUPTI_INCLUDE_DIR=" + cupti_include_dir] - cupti_lib_dir = get_env_with_keys(["TRITON_CUPTI_LIB_PATH"]) - if cupti_lib_dir == "": - cupti_lib_dir = os.path.join(get_base_dir(), "third_party", "nvidia", "backend", "lib", "cupti") - cmake_args += ["-DCUPTI_LIB_DIR=" + cupti_lib_dir] - roctracer_include_dir = get_env_with_keys(["ROCTRACER_INCLUDE_PATH"]) - if roctracer_include_dir == "": - roctracer_include_dir = os.path.join(get_base_dir(), "third_party", "amd", "backend", "include") - cmake_args += ["-DROCTRACER_INCLUDE_DIR=" + roctracer_include_dir] - return cmake_args - - def build_extension(self, ext): - lit_dir = shutil.which('lit') - ninja_dir = shutil.which('ninja') - # lit is used by the test suite - thirdparty_cmake_args = get_thirdparty_packages([get_llvm_package_info()]) - thirdparty_cmake_args += self.get_pybind11_cmake_args() - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path))) - # create build directories - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - # python directories - python_include_dir = sysconfig.get_path("platinclude") - cmake_args = [ - "-G", "Ninja", # Ninja is much faster than make - "-DCMAKE_MAKE_PROGRAM=" + - ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DLLVM_ENABLE_WERROR=ON", - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DTRITON_BUILD_TUTORIALS=OFF", - "-DTRITON_BUILD_PYTHON_MODULE=ON", "-DPython3_EXECUTABLE:FILEPATH=" + sys.executable, - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", "-DPYTHON_INCLUDE_DIRS=" + python_include_dir, - "-DTRITON_CODEGEN_BACKENDS=" + ';'.join([b.name for b in backends if not b.is_external]), - "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]) - ] - if lit_dir is not None: - cmake_args.append("-DLLVM_EXTERNAL_LIT=" + lit_dir) - cmake_args.extend(thirdparty_cmake_args) - - # configuration - cfg = get_build_type() - build_args = ["--config", cfg] - - if platform.system() == "Windows": - cmake_args += [f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] - if sys.maxsize > 2**32: - cmake_args += ["-A", "x64"] - else: - cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] - max_jobs = os.getenv("MAX_JOBS", str(2 * os.cpu_count())) - build_args += ['-j' + max_jobs] - - if check_env_flag("TRITON_BUILD_WITH_CLANG_LLD"): - cmake_args += [ - "-DCMAKE_C_COMPILER=clang", - "-DCMAKE_CXX_COMPILER=clang++", - "-DCMAKE_LINKER=lld", - "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld", - "-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld", - "-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld", - ] - - # Note that asan doesn't work with binaries that use the GPU, so this is - # only useful for tools like triton-opt that don't run code on the GPU. - # - # I tried and gave up getting msan to work. It seems that libstdc++'s - # std::string does not play nicely with clang's msan (I didn't try - # gcc's). I was unable to configure clang to ignore the error, and I - # also wasn't able to get libc++ to work, but that doesn't mean it's - # impossible. :) - if check_env_flag("TRITON_BUILD_WITH_ASAN"): - cmake_args += [ - "-DCMAKE_C_FLAGS=-fsanitize=address", - "-DCMAKE_CXX_FLAGS=-fsanitize=address", - ] - - if check_env_flag("TRITON_BUILD_WITH_CCACHE"): - cmake_args += [ - "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", - ] - - if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON - cmake_args += self.get_proton_cmake_args() - else: - cmake_args += ["-DTRITON_BUILD_PROTON=OFF"] - - if is_offline_build(): - # unit test builds fetch googletests from GitHub - cmake_args += ["-DTRITON_BUILD_UT=OFF"] - - cmake_args_append = os.getenv("TRITON_APPEND_CMAKE_ARGS") - if cmake_args_append is not None: - cmake_args += shlex.split(cmake_args_append) - - env = os.environ.copy() - cmake_dir = get_cmake_dir() - subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=cmake_dir, env=env) - subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=cmake_dir) - subprocess.check_call(["cmake", "--build", ".", "--target", "mlir-doc"], cwd=cmake_dir) - - -nvidia_version_path = os.path.join(get_base_dir(), "cmake", "nvidia-toolchain-version.json") -with open(nvidia_version_path, "r") as nvidia_version_file: - # parse this json file to get the version of the nvidia toolchain - NVIDIA_TOOLCHAIN_VERSION = json.load(nvidia_version_file) - - -def get_platform_dependent_src_path(subdir): - return lambda platform, version: ( - (lambda version_major, version_minor1, version_minor2, : f"targets/{platform}/{subdir}" - if int(version_major) >= 12 and int(version_minor1) >= 5 else subdir)(*version.split('.'))) - - -download_and_copy( - name="ptxas", src_path="bin/ptxas", dst_path="bin/ptxas", variable="TRITON_PTXAS_PATH", - version=NVIDIA_TOOLCHAIN_VERSION["ptxas"], url_func=lambda system, arch, version: - ((lambda version_major, version_minor1, version_minor2: - f"https://anaconda.org/nvidia/cuda-nvcc-tools/{version}/download/{system}-{arch}/cuda-nvcc-tools-{version}-0.tar.bz2" - if int(version_major) >= 12 and int(version_minor1) >= 5 else - f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/{system}-{arch}/cuda-nvcc-{version}-0.tar.bz2") - (*version.split('.')))) -download_and_copy( - name="cuobjdump", - src_path="bin/cuobjdump", - dst_path="bin/cuobjdump", - variable="TRITON_CUOBJDUMP_PATH", - version=NVIDIA_TOOLCHAIN_VERSION["cuobjdump"], - url_func=lambda system, arch, version: - f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/{system}-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", -) -download_and_copy( - name="nvdisasm", - src_path="bin/nvdisasm", - dst_path="bin/nvdisasm", - variable="TRITON_NVDISASM_PATH", - version=NVIDIA_TOOLCHAIN_VERSION["nvdisasm"], - url_func=lambda system, arch, version: - f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/{system}-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", -) -download_and_copy( - name="cudacrt", src_path=get_platform_dependent_src_path("include"), dst_path="include", - variable="TRITON_CUDACRT_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cudacrt"], url_func=lambda system, arch, version: - ((lambda version_major, version_minor1, version_minor2: - f"https://anaconda.org/nvidia/cuda-crt-dev_{system}-{arch}/{version}/download/noarch/cuda-crt-dev_{system}-{arch}-{version}-0.tar.bz2" - if int(version_major) >= 12 and int(version_minor1) >= 5 else - f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/{system}-{arch}/cuda-nvcc-{version}-0.tar.bz2") - (*version.split('.')))) -download_and_copy( - name="cudart", src_path=get_platform_dependent_src_path("include"), dst_path="include", - variable="TRITON_CUDART_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cudart"], url_func=lambda system, arch, version: - ((lambda version_major, version_minor1, version_minor2: - f"https://anaconda.org/nvidia/cuda-cudart-dev_{system}-{arch}/{version}/download/noarch/cuda-cudart-dev_{system}-{arch}-{version}-0.tar.bz2" - if int(version_major) >= 12 and int(version_minor1) >= 5 else - f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/{system}-{arch}/cuda-cudart-dev-{version}-0.tar.bz2" - )(*version.split('.')))) -download_and_copy( - name="cupti", src_path=get_platform_dependent_src_path("include"), dst_path="include", - variable="TRITON_CUPTI_INCLUDE_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cupti"], - url_func=lambda system, arch, version: - ((lambda version_major, version_minor1, version_minor2: - f"https://anaconda.org/nvidia/cuda-cupti-dev/{version}/download/{system}-{arch}/cuda-cupti-dev-{version}-0.tar.bz2" - if int(version_major) >= 12 and int(version_minor1) >= 5 else - f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/{system}-{arch}/cuda-cupti-{version}-0.tar.bz2") - (*version.split('.')))) -download_and_copy( - name="cupti", src_path=get_platform_dependent_src_path("lib"), dst_path="lib/cupti", - variable="TRITON_CUPTI_LIB_PATH", version=NVIDIA_TOOLCHAIN_VERSION["cupti"], url_func=lambda system, arch, version: - ((lambda version_major, version_minor1, version_minor2: - f"https://anaconda.org/nvidia/cuda-cupti-dev/{version}/download/{system}-{arch}/cuda-cupti-dev-{version}-0.tar.bz2" - if int(version_major) >= 12 and int(version_minor1) >= 5 else - f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/{system}-{arch}/cuda-cupti-{version}-0.tar.bz2") - (*version.split('.')))) - -backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()] - - -def add_link_to_backends(): - for backend in backends: - if os.path.islink(backend.install_dir): - os.unlink(backend.install_dir) - if os.path.exists(backend.install_dir): - shutil.rmtree(backend.install_dir) - os.symlink(backend.backend_dir, backend.install_dir) - - if backend.language_dir: - # Link the contents of each backend's `language` directory into - # `triton.language.extra`. - extra_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "triton", "language", "extra")) - for x in os.listdir(backend.language_dir): - src_dir = os.path.join(backend.language_dir, x) - install_dir = os.path.join(extra_dir, x) - if os.path.islink(install_dir): - os.unlink(install_dir) - if os.path.exists(install_dir): - shutil.rmtree(install_dir) - os.symlink(src_dir, install_dir) - - -def add_link_to_proton(): - proton_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "third_party", "proton", "proton")) - proton_install_dir = os.path.join(os.path.dirname(__file__), "triton", "profiler") - if os.path.islink(proton_install_dir): - os.unlink(proton_install_dir) - if os.path.exists(proton_install_dir): - shutil.rmtree(proton_install_dir) - os.symlink(proton_dir, proton_install_dir) - - -def add_links(): - add_link_to_backends() - if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON - add_link_to_proton() - - -class plugin_install(install): - - def run(self): - add_links() - install.run(self) - - -class plugin_develop(develop): - - def run(self): - add_links() - develop.run(self) - - -class plugin_bdist_wheel(bdist_wheel): - - def run(self): - add_links() - bdist_wheel.run(self) - - -class plugin_egginfo(egg_info): - - def run(self): - add_links() - egg_info.run(self) - - -package_data = { - "triton/tools": ["compile.h", "compile.c"], **{f"triton/backends/{b.name}": b.package_data - for b in backends}, "triton/language/extra": sum( - (b.language_package_data for b in backends), []) -} - - -def get_language_extra_packages(): - packages = [] - for backend in backends: - if backend.language_dir is None: - continue - - # Walk the `language` directory of each backend to enumerate - # any subpackages, which will be added to `triton.language.extra`. - for dir, dirs, files in os.walk(backend.language_dir, followlinks=True): - if not any(f for f in files if f.endswith(".py")) or dir == backend.language_dir: - # Ignore directories with no python files. - # Also ignore the root directory which corresponds to - # "triton/language/extra". - continue - subpackage = os.path.relpath(dir, backend.language_dir) - package = os.path.join("triton/language/extra", subpackage) - packages.append(package) - - return list(packages) - - -def get_packages(): - packages = [ - "triton", - "triton/_C", - "triton/compiler", - "triton/language", - "triton/language/extra", - "triton/runtime", - "triton/backends", - "triton/tools", - ] - packages += [f'triton/backends/{backend.name}' for backend in backends] - packages += get_language_extra_packages() - if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON - packages += ["triton/profiler"] - - return packages - - -def get_entry_points(): - entry_points = {} - if check_env_flag("TRITON_BUILD_PROTON", "ON"): # Default ON - entry_points["console_scripts"] = [ - "proton-viewer = triton.profiler.viewer:main", - "proton = triton.profiler.proton:main", - ] - return entry_points - - -def get_git_commit_hash(length=8): - try: - cmd = ['git', 'rev-parse', f'--short={length}', 'HEAD'] - return "+git{}".format(subprocess.check_output(cmd).strip().decode('utf-8')) - except Exception: - return "" - - -setup( - name=os.environ.get("TRITON_WHEEL_NAME", "triton"), - version="3.2.0" + os.environ.get("TRITON_WHEEL_VERSION_SUFFIX", ""), - author="Philippe Tillet", - author_email="phil@openai.com", - description="A language and compiler for custom Deep Learning operations", - long_description="", - packages=get_packages(), - entry_points=get_entry_points(), - package_data=package_data, - include_package_data=True, - ext_modules=[CMakeExtension("triton", "triton/_C/")], - cmdclass={ - "build_ext": CMakeBuild, - "build_py": CMakeBuildPy, - "clean": CMakeClean, - "install": plugin_install, - "develop": plugin_develop, - "bdist_wheel": plugin_bdist_wheel, - "egg_info": plugin_egginfo, - }, - zip_safe=False, - # for PyPI - keywords=["Compiler", "Deep Learning"], - url="https://github.com/triton-lang/triton/", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Topic :: Software Development :: Build Tools", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - ], - test_suite="tests", - extras_require={ - "build": [ - "cmake>=3.20", - "lit", - ], - "tests": [ - "autopep8", - "flake8", - "isort", - "numpy", - "pytest", - "scipy>=1.7.1", - "llnl-hatchet", - ], - "tutorials": [ - "matplotlib", - "pandas", - "tabulate", - ], - }, -) From 63b5fc5bebcd09df90ed1f9ef4da6fbe09d0d6aa Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 14:39:12 +0200 Subject: [PATCH 37/51] For real im getting coffee --- .github/workflows/test.yml | 10 +++------- triton/patches/python/setup.cfg | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) delete mode 100644 triton/patches/python/setup.cfg diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9695b1a..29464c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,19 +13,15 @@ jobs: runs-on: ubuntu-22.04-arm concurrency: arm64 steps: - - name: Checkout - uses: actions/checkout@v5 - with: - path: main - name: Checkout uses: actions/checkout@v5 with: repository: triton-lang/triton ref: v3.3.1 - path: triton - name: Patch setup run: | - cp -r main/triton/patches/python/* triton/python/ + echo "[build_ext]" >> python/setup.cfg + echo "base-dir=/project" >> python/setup.cfg - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: @@ -33,7 +29,7 @@ jobs: CIBW_BUILD: "cp312-manylinux_aarch64" CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" with: - package-dir: triton/python + package-dir: python output-dir: wheelhouse - name: Upload wheels uses: actions/upload-artifact@v4 diff --git a/triton/patches/python/setup.cfg b/triton/patches/python/setup.cfg deleted file mode 100644 index d923951..0000000 --- a/triton/patches/python/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[build_ext] -base-dir=/project From bcf2cd3dae051c81fc9be7f76e0da56b7946486a Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 15:09:03 +0200 Subject: [PATCH 38/51] Fix --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 29464c9..a7dcc8b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -94,7 +94,7 @@ jobs: sudo rm -rf /opt/hostedtoolcache df -h - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Download a single artifact uses: actions/download-artifact@v5 with: From 4a9b1efee705d1f5a939c3f4b9ae1d8cca0f0662 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 15:09:09 +0200 Subject: [PATCH 39/51] Add release --- .github/workflows/release.yml | 185 ++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..d3f5a1b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,185 @@ +name: release-docker-images + +on: + pull_request: + branches: + - main + +env: + IMAGE: zappi/vllm-openai + +jobs: + build-triton-wheel-arm64: + runs-on: ubuntu-22.04-arm + concurrency: arm64 + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + repository: triton-lang/triton + ref: v3.3.1 + - name: Patch setup + run: | + echo "[build_ext]" >> python/setup.cfg + echo "base-dir=/project" >> python/setup.cfg + - name: Build wheels + uses: pypa/cibuildwheel@v3.2.0 + env: + CIBW_BEFORE_ALL: "dnf install clang lld -y" + CIBW_BUILD: "cp312-manylinux_aarch64" + CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + with: + package-dir: python + output-dir: wheelhouse + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/*.whl + + docker-hub-release-amd64: + runs-on: ubuntu-latest + concurrency: amd64 + steps: + - name: Free additional disk space + run: | + df -h + echo "Removing android..." + sudo rm -rf /usr/local/lib/android + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet + echo "Removing haskell" + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup + echo "Removing tool cache..." + sudo rm -rf /opt/hostedtoolcache + df -h + - name: Checkout + uses: actions/checkout@v5 + - name: Prepare image metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + - name: Build, tag, and push image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.amd64 + labels: ${{ steps.metadata.outputs.labels }} + platforms: linux/amd64 + push: true + tags: ${{ steps.metadata.outputs.tags }} + + docker-hub-release-arm64: + runs-on: ubuntu-latest + needs: build-triton-wheel-arm64 + concurrency: arm64 + steps: + - name: Free additional disk space + run: | + df -h + echo "Removing android..." + sudo rm -rf /usr/local/lib/android + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet + echo "Removing haskell" + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/.ghcup + echo "Removing tool cache..." + sudo rm -rf /opt/hostedtoolcache + df -h + - name: Checkout + uses: actions/checkout@v5 + - name: Download a single artifact + uses: actions/download-artifact@v5 + with: + name: triton-wheels-linux-arm64 + path: ./wheelhouse/ + - name: Prepare image metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + - name: Build, tag, and push image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.arm64 + labels: ${{ steps.metadata.outputs.labels }} + platforms: linux/arm64 + push: true + tags: ${{ steps.metadata.outputs.tags }} + + docker-hub-release-manifest: + runs-on: ubuntu-latest + needs: + - build-amd64 + - build-arm64 + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Prepare image metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + - name: Build, tag, and push image to Docker Hub + uses: docker/build-push-action@v6 + with: + cache-from: type=gha + cache-to: type=gha,mode=max + context: . + labels: ${{ steps.metadata.outputs.labels }} + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.metadata.outputs.tags }} + - name: Update description on Docker Hub Description + uses: peter-evans/dockerhub-description@v5 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + repository: ${{ env.IMAGE }} + + github-release: + runs-on: ubuntu-latest + needs: docker-hub-release + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Create Release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Version ${{ github.ref }} + draft: false + prerelease: false From ec7cc7678e3d1f93753ae0d0b7085ce42c74b69f Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Thu, 2 Oct 2025 19:22:44 +0200 Subject: [PATCH 40/51] Test --- .github/workflows/release.yml | 41 ++++++++++++++++++++--------------- .github/workflows/test.yml | 4 ++++ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d3f5a1b..814c880 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,12 +1,13 @@ name: release-docker-images on: - pull_request: - branches: - - main + push: + tags: + - '*' env: IMAGE: zappi/vllm-openai + VERSION: 0.10.2 jobs: build-triton-wheel-arm64: @@ -61,6 +62,8 @@ jobs: uses: docker/metadata-action@v5 with: images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -110,6 +113,8 @@ jobs: uses: docker/metadata-action@v5 with: images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -168,18 +173,18 @@ jobs: password: ${{ secrets.DOCKER_HUB_TOKEN }} repository: ${{ env.IMAGE }} - github-release: - runs-on: ubuntu-latest - needs: docker-hub-release - steps: - - name: Checkout - uses: actions/checkout@v5 - - name: Create Release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ github.ref }} - release_name: Version ${{ github.ref }} - draft: false - prerelease: false + # github-release: + # runs-on: ubuntu-latest + # needs: docker-hub-release + # steps: + # - name: Checkout + # uses: actions/checkout@v5 + # - name: Create Release + # uses: actions/create-release@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # tag_name: ${{ github.ref }} + # release_name: Version ${{ github.ref }} + # draft: false + # prerelease: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a7dcc8b..92b1225 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -61,6 +61,8 @@ jobs: uses: docker/metadata-action@v5 with: images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -105,6 +107,8 @@ jobs: uses: docker/metadata-action@v5 with: images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx From 847b0fdb07c959584da22b31b02f6cee8672d50a Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Fri, 3 Oct 2025 11:57:03 +0200 Subject: [PATCH 41/51] Test --- .github/workflows/release.yml | 53 ++++++++++++++++------------------- .github/workflows/test.yml | 1 + 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 814c880..7df3434 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,9 +1,14 @@ name: release-docker-images +# on: +# push: +# tags: +# - '*' + on: - push: - tags: - - '*' + pull_request: + branches: + - main env: IMAGE: zappi/vllm-openai @@ -137,18 +142,11 @@ jobs: docker-hub-release-manifest: runs-on: ubuntu-latest needs: - - build-amd64 - - build-arm64 + - docker-hub-release-amd64 + - docker-hub-release-arm64 steps: - name: Checkout uses: actions/checkout@v5 - - name: Prepare image metadata - id: metadata - uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE }} - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub @@ -156,26 +154,23 @@ jobs: with: username: ${{ secrets.DOCKER_HUB_USERNAME }} password: ${{ secrets.DOCKER_HUB_TOKEN }} - - name: Build, tag, and push image to Docker Hub - uses: docker/build-push-action@v6 - with: - cache-from: type=gha - cache-to: type=gha,mode=max - context: . - labels: ${{ steps.metadata.outputs.labels }} - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.metadata.outputs.tags }} - - name: Update description on Docker Hub Description - uses: peter-evans/dockerhub-description@v5 - with: - username: ${{ secrets.DOCKER_HUB_USERNAME }} - password: ${{ secrets.DOCKER_HUB_TOKEN }} - repository: ${{ env.IMAGE }} + - name: Create and push multi-arch manifest + run: | + # Create multi-arch manifest + docker buildx imagetools create \ + --tag ${{ env.IMAGE }}:v${{ env.VERSION }} \ + ${{ env.IMAGE }}:v${{ env.VERSION }}-amd64 \ + ${{ env.IMAGE }}:v${{ env.VERSION }}-arm64 + # - name: Update description on Docker Hub + # uses: peter-evans/dockerhub-description@v5 + # with: + # username: ${{ secrets.DOCKER_HUB_USERNAME }} + # password: ${{ secrets.DOCKER_HUB_TOKEN }} + # repository: ${{ env.IMAGE }} # github-release: # runs-on: ubuntu-latest - # needs: docker-hub-release + # needs: docker-hub-release-manifest # steps: # - name: Checkout # uses: actions/checkout@v5 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 92b1225..875fb8c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,7 @@ on: env: IMAGE: zappi/vllm-openai + VERSION: 0.10.2 jobs: build-triton-wheel-arm64: From b71bb73e9f4e1f33d1af9b449e9e9aa8e2df7154 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 12:49:58 +0200 Subject: [PATCH 42/51] Downgrade to v0.9.2 --- .github/workflows/release.yml | 222 +++++++++++++++++----------------- .github/workflows/test.yml | 156 ++++++++++++------------ Dockerfile.amd64 | 4 +- 3 files changed, 193 insertions(+), 189 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7df3434..d65df89 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,36 +12,36 @@ on: env: IMAGE: zappi/vllm-openai - VERSION: 0.10.2 + VERSION: 0.9.2 jobs: - build-triton-wheel-arm64: - runs-on: ubuntu-22.04-arm - concurrency: arm64 - steps: - - name: Checkout - uses: actions/checkout@v5 - with: - repository: triton-lang/triton - ref: v3.3.1 - - name: Patch setup - run: | - echo "[build_ext]" >> python/setup.cfg - echo "base-dir=/project" >> python/setup.cfg - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.0 - env: - CIBW_BEFORE_ALL: "dnf install clang lld -y" - CIBW_BUILD: "cp312-manylinux_aarch64" - CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - with: - package-dir: python - output-dir: wheelhouse - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/*.whl + # build-triton-wheel-arm64: + # runs-on: ubuntu-22.04-arm + # concurrency: arm64 + # steps: + # - name: Checkout + # uses: actions/checkout@v5 + # with: + # repository: triton-lang/triton + # ref: v3.3.1 + # - name: Patch setup + # run: | + # echo "[build_ext]" >> python/setup.cfg + # echo "base-dir=/project" >> python/setup.cfg + # - name: Build wheels + # uses: pypa/cibuildwheel@v3.2.0 + # env: + # CIBW_BEFORE_ALL: "dnf install clang lld -y" + # CIBW_BUILD: "cp312-manylinux_aarch64" + # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + # with: + # package-dir: python + # output-dir: wheelhouse + # - name: Upload wheels + # uses: actions/upload-artifact@v4 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/*.whl docker-hub-release-amd64: runs-on: ubuntu-latest @@ -65,10 +65,10 @@ jobs: - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE }} - tags: | - type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 + # with: + # images: ${{ env.IMAGE }} + # tags: | + # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -81,6 +81,8 @@ jobs: - name: Build, tag, and push image uses: docker/build-push-action@v6 with: + cache-from: type=gha + cache-to: type=gha,mode=max context: . file: Dockerfile.amd64 labels: ${{ steps.metadata.outputs.labels }} @@ -88,85 +90,85 @@ jobs: push: true tags: ${{ steps.metadata.outputs.tags }} - docker-hub-release-arm64: - runs-on: ubuntu-latest - needs: build-triton-wheel-arm64 - concurrency: arm64 - steps: - - name: Free additional disk space - run: | - df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet - echo "Removing haskell" - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/.ghcup - echo "Removing tool cache..." - sudo rm -rf /opt/hostedtoolcache - df -h - - name: Checkout - uses: actions/checkout@v5 - - name: Download a single artifact - uses: actions/download-artifact@v5 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/ - - name: Prepare image metadata - id: metadata - uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE }} - tags: | - type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_HUB_USERNAME }} - password: ${{ secrets.DOCKER_HUB_TOKEN }} - - name: Build, tag, and push image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile.arm64 - labels: ${{ steps.metadata.outputs.labels }} - platforms: linux/arm64 - push: true - tags: ${{ steps.metadata.outputs.tags }} + # docker-hub-release-arm64: + # runs-on: ubuntu-latest + # needs: build-triton-wheel-arm64 + # concurrency: arm64 + # steps: + # - name: Free additional disk space + # run: | + # df -h + # echo "Removing android..." + # sudo rm -rf /usr/local/lib/android + # echo "Removing dotnet..." + # sudo rm -rf /usr/share/dotnet + # echo "Removing haskell" + # sudo rm -rf /opt/ghc + # sudo rm -rf /usr/local/.ghcup + # echo "Removing tool cache..." + # sudo rm -rf /opt/hostedtoolcache + # df -h + # - name: Checkout + # uses: actions/checkout@v5 + # - name: Download a single artifact + # uses: actions/download-artifact@v5 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/ + # - name: Prepare image metadata + # id: metadata + # uses: docker/metadata-action@v5 + # with: + # images: ${{ env.IMAGE }} + # tags: | + # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 + # - name: Set up QEMU + # uses: docker/setup-qemu-action@v3 + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # - name: Login to Docker Hub + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKER_HUB_USERNAME }} + # password: ${{ secrets.DOCKER_HUB_TOKEN }} + # - name: Build, tag, and push image + # uses: docker/build-push-action@v6 + # with: + # context: . + # file: Dockerfile.arm64 + # labels: ${{ steps.metadata.outputs.labels }} + # platforms: linux/arm64 + # push: true + # tags: ${{ steps.metadata.outputs.tags }} - docker-hub-release-manifest: - runs-on: ubuntu-latest - needs: - - docker-hub-release-amd64 - - docker-hub-release-arm64 - steps: - - name: Checkout - uses: actions/checkout@v5 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_HUB_USERNAME }} - password: ${{ secrets.DOCKER_HUB_TOKEN }} - - name: Create and push multi-arch manifest - run: | - # Create multi-arch manifest - docker buildx imagetools create \ - --tag ${{ env.IMAGE }}:v${{ env.VERSION }} \ - ${{ env.IMAGE }}:v${{ env.VERSION }}-amd64 \ - ${{ env.IMAGE }}:v${{ env.VERSION }}-arm64 - # - name: Update description on Docker Hub - # uses: peter-evans/dockerhub-description@v5 - # with: - # username: ${{ secrets.DOCKER_HUB_USERNAME }} - # password: ${{ secrets.DOCKER_HUB_TOKEN }} - # repository: ${{ env.IMAGE }} + # docker-hub-release-manifest: + # runs-on: ubuntu-latest + # needs: + # - docker-hub-release-amd64 + # - docker-hub-release-arm64 + # steps: + # - name: Checkout + # uses: actions/checkout@v5 + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # - name: Login to Docker Hub + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKER_HUB_USERNAME }} + # password: ${{ secrets.DOCKER_HUB_TOKEN }} + # - name: Create and push multi-arch manifest + # run: | + # # Create multi-arch manifest + # docker buildx imagetools create \ + # --tag ${{ env.IMAGE }}:v${{ env.VERSION }} \ + # ${{ env.IMAGE }}:v${{ env.VERSION }}-amd64 \ + # ${{ env.IMAGE }}:v${{ env.VERSION }}-arm64 + # # - name: Update description on Docker Hub + # # uses: peter-evans/dockerhub-description@v5 + # # with: + # # username: ${{ secrets.DOCKER_HUB_USERNAME }} + # # password: ${{ secrets.DOCKER_HUB_TOKEN }} + # # repository: ${{ env.IMAGE }} # github-release: # runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 875fb8c..998d85e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,36 +7,36 @@ on: env: IMAGE: zappi/vllm-openai - VERSION: 0.10.2 + VERSION: 0.9.2 jobs: - build-triton-wheel-arm64: - runs-on: ubuntu-22.04-arm - concurrency: arm64 - steps: - - name: Checkout - uses: actions/checkout@v5 - with: - repository: triton-lang/triton - ref: v3.3.1 - - name: Patch setup - run: | - echo "[build_ext]" >> python/setup.cfg - echo "base-dir=/project" >> python/setup.cfg - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.0 - env: - CIBW_BEFORE_ALL: "dnf install clang lld -y" - CIBW_BUILD: "cp312-manylinux_aarch64" - CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - with: - package-dir: python - output-dir: wheelhouse - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/*.whl + # build-triton-wheel-arm64: + # runs-on: ubuntu-22.04-arm + # concurrency: arm64 + # steps: + # - name: Checkout + # uses: actions/checkout@v5 + # with: + # repository: triton-lang/triton + # ref: v3.3.1 + # - name: Patch setup + # run: | + # echo "[build_ext]" >> python/setup.cfg + # echo "base-dir=/project" >> python/setup.cfg + # - name: Build wheels + # uses: pypa/cibuildwheel@v3.2.0 + # env: + # CIBW_BEFORE_ALL: "dnf install clang lld -y" + # CIBW_BUILD: "cp312-manylinux_aarch64" + # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" + # with: + # package-dir: python + # output-dir: wheelhouse + # - name: Upload wheels + # uses: actions/upload-artifact@v4 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/*.whl build-amd64: runs-on: ubuntu-latest @@ -60,10 +60,10 @@ jobs: - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE }} - tags: | - type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 + # with: + # images: ${{ env.IMAGE }} + # tags: | + # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -71,6 +71,8 @@ jobs: - name: Test building of image uses: docker/build-push-action@v6 with: + cache-from: type=gha + cache-to: type=gha,mode=max context: . file: Dockerfile.amd64 labels: ${{ steps.metadata.outputs.labels }} @@ -78,48 +80,48 @@ jobs: push: false tags: ${{ steps.metadata.outputs.tags }} - build-arm64: - runs-on: ubuntu-latest - needs: build-triton-wheel-arm64 - concurrency: arm64 - steps: - - name: Free additional disk space - run: | - df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet - echo "Removing haskell" - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/.ghcup - echo "Removing tool cache..." - sudo rm -rf /opt/hostedtoolcache - df -h - - name: Checkout - uses: actions/checkout@v5 - - name: Download a single artifact - uses: actions/download-artifact@v5 - with: - name: triton-wheels-linux-arm64 - path: ./wheelhouse/ - - name: Prepare image metadata - id: metadata - uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE }} - tags: | - type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Test building of image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile.arm64 - labels: ${{ steps.metadata.outputs.labels }} - platforms: linux/arm64 - push: false - tags: ${{ steps.metadata.outputs.tags }} + # build-arm64: + # runs-on: ubuntu-latest + # needs: build-triton-wheel-arm64 + # concurrency: arm64 + # steps: + # - name: Free additional disk space + # run: | + # df -h + # echo "Removing android..." + # sudo rm -rf /usr/local/lib/android + # echo "Removing dotnet..." + # sudo rm -rf /usr/share/dotnet + # echo "Removing haskell" + # sudo rm -rf /opt/ghc + # sudo rm -rf /usr/local/.ghcup + # echo "Removing tool cache..." + # sudo rm -rf /opt/hostedtoolcache + # df -h + # - name: Checkout + # uses: actions/checkout@v5 + # - name: Download a single artifact + # uses: actions/download-artifact@v5 + # with: + # name: triton-wheels-linux-arm64 + # path: ./wheelhouse/ + # - name: Prepare image metadata + # id: metadata + # uses: docker/metadata-action@v5 + # with: + # images: ${{ env.IMAGE }} + # tags: | + # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 + # - name: Set up QEMU + # uses: docker/setup-qemu-action@v3 + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # - name: Test building of image + # uses: docker/build-push-action@v6 + # with: + # context: . + # file: Dockerfile.arm64 + # labels: ${{ steps.metadata.outputs.labels }} + # platforms: linux/arm64 + # push: false + # tags: ${{ steps.metadata.outputs.tags }} diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 index 879e532..3ead493 100644 --- a/Dockerfile.amd64 +++ b/Dockerfile.amd64 @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai:v0.10.2 +FROM vllm/vllm-openai:v0.9.2 RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==3.3.1 + uv pip install --system triton==3.2.0 From ca9784d92874d789c503d6cbbd0eb7b67671be9e Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 13:26:36 +0200 Subject: [PATCH 43/51] Update tags --- .github/workflows/release.yml | 11 ++++++----- .github/workflows/test.yml | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d65df89..62360fb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,7 +45,7 @@ jobs: docker-hub-release-amd64: runs-on: ubuntu-latest - concurrency: amd64 + # concurrency: amd64 steps: - name: Free additional disk space run: | @@ -65,10 +65,11 @@ jobs: - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 - # with: - # images: ${{ env.IMAGE }} - # tags: | - # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 + with: + images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 + type=raw,value=${{ env.VERSION }},prefix=v - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 998d85e..daa43b9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: build-amd64: runs-on: ubuntu-latest - concurrency: amd64 + # concurrency: amd64 steps: - name: Free additional disk space run: | From 8062ef351626ac0b89628280535aed83f4b0836d Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 15:35:54 +0200 Subject: [PATCH 44/51] Back up to v0.10.2 --- Dockerfile.amd64 | 2 +- README.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 index 3ead493..bd52886 100644 --- a/Dockerfile.amd64 +++ b/Dockerfile.amd64 @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai:v0.9.2 +FROM vllm/vllm-openai:v0.10.2 RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system triton==3.2.0 diff --git a/README.md b/README.md index 12e483d..65c5db6 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ -# docker-vllm +# vLLM + OpenAI compatible server for vLLM From d12cab186fde6c6c52235c3e341e77455e7776f5 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 21:35:15 +0200 Subject: [PATCH 45/51] Updates --- .github/workflows/release.yml | 122 +++------------------------------- .github/workflows/test.yml | 94 ++++---------------------- Dockerfile | 4 ++ Dockerfile.amd64 | 4 -- Dockerfile.arm64 | 5 -- 5 files changed, 25 insertions(+), 204 deletions(-) create mode 100644 Dockerfile delete mode 100644 Dockerfile.amd64 delete mode 100644 Dockerfile.arm64 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 62360fb..67aca74 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,40 +12,12 @@ on: env: IMAGE: zappi/vllm-openai - VERSION: 0.9.2 + VLLM_VERSION: 0.9.2 + TRITON_VERSION: 3.2.0 jobs: - # build-triton-wheel-arm64: - # runs-on: ubuntu-22.04-arm - # concurrency: arm64 - # steps: - # - name: Checkout - # uses: actions/checkout@v5 - # with: - # repository: triton-lang/triton - # ref: v3.3.1 - # - name: Patch setup - # run: | - # echo "[build_ext]" >> python/setup.cfg - # echo "base-dir=/project" >> python/setup.cfg - # - name: Build wheels - # uses: pypa/cibuildwheel@v3.2.0 - # env: - # CIBW_BEFORE_ALL: "dnf install clang lld -y" - # CIBW_BUILD: "cp312-manylinux_aarch64" - # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - # with: - # package-dir: python - # output-dir: wheelhouse - # - name: Upload wheels - # uses: actions/upload-artifact@v4 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/*.whl - - docker-hub-release-amd64: + docker-hub-release: runs-on: ubuntu-latest - # concurrency: amd64 steps: - name: Free additional disk space run: | @@ -68,8 +40,8 @@ jobs: with: images: ${{ env.IMAGE }} tags: | - type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 - type=raw,value=${{ env.VERSION }},prefix=v + type=raw,value=${{ env.VLLM_VERSION }}-triton-${{ env.TRITON_VERSION }} + type=raw,value=${{ env.VLLM_VERSION }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -82,94 +54,18 @@ jobs: - name: Build, tag, and push image uses: docker/build-push-action@v6 with: + build-args: | + VLLM_VERSION=${{ env.VLLM_VERSION }} + TRITON_VERSION=${{ env.TRITON_VERSION }} cache-from: type=gha cache-to: type=gha,mode=max context: . - file: Dockerfile.amd64 + file: Dockerfile labels: ${{ steps.metadata.outputs.labels }} platforms: linux/amd64 push: true tags: ${{ steps.metadata.outputs.tags }} - # docker-hub-release-arm64: - # runs-on: ubuntu-latest - # needs: build-triton-wheel-arm64 - # concurrency: arm64 - # steps: - # - name: Free additional disk space - # run: | - # df -h - # echo "Removing android..." - # sudo rm -rf /usr/local/lib/android - # echo "Removing dotnet..." - # sudo rm -rf /usr/share/dotnet - # echo "Removing haskell" - # sudo rm -rf /opt/ghc - # sudo rm -rf /usr/local/.ghcup - # echo "Removing tool cache..." - # sudo rm -rf /opt/hostedtoolcache - # df -h - # - name: Checkout - # uses: actions/checkout@v5 - # - name: Download a single artifact - # uses: actions/download-artifact@v5 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/ - # - name: Prepare image metadata - # id: metadata - # uses: docker/metadata-action@v5 - # with: - # images: ${{ env.IMAGE }} - # tags: | - # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - name: Login to Docker Hub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKER_HUB_USERNAME }} - # password: ${{ secrets.DOCKER_HUB_TOKEN }} - # - name: Build, tag, and push image - # uses: docker/build-push-action@v6 - # with: - # context: . - # file: Dockerfile.arm64 - # labels: ${{ steps.metadata.outputs.labels }} - # platforms: linux/arm64 - # push: true - # tags: ${{ steps.metadata.outputs.tags }} - - # docker-hub-release-manifest: - # runs-on: ubuntu-latest - # needs: - # - docker-hub-release-amd64 - # - docker-hub-release-arm64 - # steps: - # - name: Checkout - # uses: actions/checkout@v5 - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - name: Login to Docker Hub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKER_HUB_USERNAME }} - # password: ${{ secrets.DOCKER_HUB_TOKEN }} - # - name: Create and push multi-arch manifest - # run: | - # # Create multi-arch manifest - # docker buildx imagetools create \ - # --tag ${{ env.IMAGE }}:v${{ env.VERSION }} \ - # ${{ env.IMAGE }}:v${{ env.VERSION }}-amd64 \ - # ${{ env.IMAGE }}:v${{ env.VERSION }}-arm64 - # # - name: Update description on Docker Hub - # # uses: peter-evans/dockerhub-description@v5 - # # with: - # # username: ${{ secrets.DOCKER_HUB_USERNAME }} - # # password: ${{ secrets.DOCKER_HUB_TOKEN }} - # # repository: ${{ env.IMAGE }} # github-release: # runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index daa43b9..44f8125 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,40 +7,12 @@ on: env: IMAGE: zappi/vllm-openai - VERSION: 0.9.2 + VLLM_VERSION: 0.9.2 + TRITON_VERSION: 3.2.0 jobs: - # build-triton-wheel-arm64: - # runs-on: ubuntu-22.04-arm - # concurrency: arm64 - # steps: - # - name: Checkout - # uses: actions/checkout@v5 - # with: - # repository: triton-lang/triton - # ref: v3.3.1 - # - name: Patch setup - # run: | - # echo "[build_ext]" >> python/setup.cfg - # echo "base-dir=/project" >> python/setup.cfg - # - name: Build wheels - # uses: pypa/cibuildwheel@v3.2.0 - # env: - # CIBW_BEFORE_ALL: "dnf install clang lld -y" - # CIBW_BUILD: "cp312-manylinux_aarch64" - # CIBW_ENVIRONMENT: "MAX_JOBS=4 TRITON_BUILD_WITH_CLANG_LLD=1" - # with: - # package-dir: python - # output-dir: wheelhouse - # - name: Upload wheels - # uses: actions/upload-artifact@v4 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/*.whl - - build-amd64: + build: runs-on: ubuntu-latest - # concurrency: amd64 steps: - name: Free additional disk space run: | @@ -60,10 +32,11 @@ jobs: - name: Prepare image metadata id: metadata uses: docker/metadata-action@v5 - # with: - # images: ${{ env.IMAGE }} - # tags: | - # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-amd64 + with: + images: ${{ env.IMAGE }} + tags: | + type=raw,value=${{ env.VLLM_VERSION }}-triton-${{ env.TRITON_VERSION }} + type=raw,value=${{ env.VLLM_VERSION }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -71,57 +44,14 @@ jobs: - name: Test building of image uses: docker/build-push-action@v6 with: + build-args: | + VLLM_VERSION=${{ env.VLLM_VERSION }} + TRITON_VERSION=${{ env.TRITON_VERSION }} cache-from: type=gha cache-to: type=gha,mode=max context: . - file: Dockerfile.amd64 + file: Dockerfile labels: ${{ steps.metadata.outputs.labels }} platforms: linux/amd64 push: false tags: ${{ steps.metadata.outputs.tags }} - - # build-arm64: - # runs-on: ubuntu-latest - # needs: build-triton-wheel-arm64 - # concurrency: arm64 - # steps: - # - name: Free additional disk space - # run: | - # df -h - # echo "Removing android..." - # sudo rm -rf /usr/local/lib/android - # echo "Removing dotnet..." - # sudo rm -rf /usr/share/dotnet - # echo "Removing haskell" - # sudo rm -rf /opt/ghc - # sudo rm -rf /usr/local/.ghcup - # echo "Removing tool cache..." - # sudo rm -rf /opt/hostedtoolcache - # df -h - # - name: Checkout - # uses: actions/checkout@v5 - # - name: Download a single artifact - # uses: actions/download-artifact@v5 - # with: - # name: triton-wheels-linux-arm64 - # path: ./wheelhouse/ - # - name: Prepare image metadata - # id: metadata - # uses: docker/metadata-action@v5 - # with: - # images: ${{ env.IMAGE }} - # tags: | - # type=raw,value=${{ env.VERSION }},prefix=v,suffix=-arm64 - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - name: Test building of image - # uses: docker/build-push-action@v6 - # with: - # context: . - # file: Dockerfile.arm64 - # labels: ${{ steps.metadata.outputs.labels }} - # platforms: linux/arm64 - # push: false - # tags: ${{ steps.metadata.outputs.tags }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d76872d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,4 @@ +FROM vllm/vllm-openai:v${VLLM_VERSION} + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system triton==${TRITON_VERSION} diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 deleted file mode 100644 index bd52886..0000000 --- a/Dockerfile.amd64 +++ /dev/null @@ -1,4 +0,0 @@ -FROM vllm/vllm-openai:v0.10.2 - -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==3.2.0 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 deleted file mode 100644 index 8fa7571..0000000 --- a/Dockerfile.arm64 +++ /dev/null @@ -1,5 +0,0 @@ -FROM vllm/vllm-openai:v0.10.2 - -RUN --mount=type=bind,source=./wheelhouse,target=/wheelhouse \ - --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system /wheelhouse/triton-*.whl From 128de25d4abcfbc432dd23611480134d83d701d3 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 21:47:08 +0200 Subject: [PATCH 46/51] Fixes --- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 6 +----- Dockerfile | 3 +++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 67aca74..e32f6df 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,7 +26,7 @@ jobs: sudo rm -rf /usr/local/lib/android echo "Removing dotnet..." sudo rm -rf /usr/share/dotnet - echo "Removing haskell" + echo "Removing haskell..." sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 44f8125..cdaa1d9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,11 +17,7 @@ jobs: - name: Free additional disk space run: | df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet - echo "Removing haskell" + echo "Removing haskell..." sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup echo "Removing tool cache..." diff --git a/Dockerfile b/Dockerfile index d76872d..91ff845 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,6 @@ +ARG VLLM_VERSION +ARG TRITON_VERSION + FROM vllm/vllm-openai:v${VLLM_VERSION} RUN --mount=type=cache,target=/root/.cache/uv \ From b553dad8411d86e41e31a8051f5d7c278f5f6ff2 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 21:51:30 +0200 Subject: [PATCH 47/51] fix --- .github/workflows/release.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e32f6df..4959850 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,10 +22,6 @@ jobs: - name: Free additional disk space run: | df -h - echo "Removing android..." - sudo rm -rf /usr/local/lib/android - echo "Removing dotnet..." - sudo rm -rf /usr/share/dotnet echo "Removing haskell..." sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup From 260990e638dc3593e5e2f0993bc3b9c742ef6f2b Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 22:13:58 +0200 Subject: [PATCH 48/51] fix --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 91ff845..505003b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,6 @@ ARG VLLM_VERSION -ARG TRITON_VERSION - FROM vllm/vllm-openai:v${VLLM_VERSION} +ARG TRITON_VERSION RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system triton==${TRITON_VERSION} From 920bd31904d9a20567a328ac114e297fa7a1caba Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 22:28:08 +0200 Subject: [PATCH 49/51] Update --- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4959850..59efa6a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ on: env: IMAGE: zappi/vllm-openai - VLLM_VERSION: 0.9.2 + VLLM_VERSION: 0.10.2 TRITON_VERSION: 3.2.0 jobs: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cdaa1d9..fbc97fc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ on: env: IMAGE: zappi/vllm-openai - VLLM_VERSION: 0.9.2 + VLLM_VERSION: 0.10.2 TRITON_VERSION: 3.2.0 jobs: From 860e46fee59e6fe9b0dab14c12b409c25011ad80 Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 22:35:26 +0200 Subject: [PATCH 50/51] More space --- .github/workflows/release.yml | 2 ++ .github/workflows/test.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 59efa6a..446b14c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,6 +22,8 @@ jobs: - name: Free additional disk space run: | df -h + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet echo "Removing haskell..." sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fbc97fc..8931876 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,6 +17,8 @@ jobs: - name: Free additional disk space run: | df -h + echo "Removing dotnet..." + sudo rm -rf /usr/share/dotnet echo "Removing haskell..." sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup From e8ededefd24627adfab4cf823dcb3e07aa0e353d Mon Sep 17 00:00:00 2001 From: Zac Blazic Date: Mon, 6 Oct 2025 23:28:32 +0200 Subject: [PATCH 51/51] Release --- .github/workflows/release.yml | 9 ++------- .github/workflows/test.yml | 9 ++------- Dockerfile | 6 ++---- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 446b14c..86d9627 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,8 +12,7 @@ on: env: IMAGE: zappi/vllm-openai - VLLM_VERSION: 0.10.2 - TRITON_VERSION: 3.2.0 + VERSION: 0.10.2 jobs: docker-hub-release: @@ -38,8 +37,7 @@ jobs: with: images: ${{ env.IMAGE }} tags: | - type=raw,value=${{ env.VLLM_VERSION }}-triton-${{ env.TRITON_VERSION }} - type=raw,value=${{ env.VLLM_VERSION }} + type=raw,value=${{ env.VERSION }},prefix=v - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -52,9 +50,6 @@ jobs: - name: Build, tag, and push image uses: docker/build-push-action@v6 with: - build-args: | - VLLM_VERSION=${{ env.VLLM_VERSION }} - TRITON_VERSION=${{ env.TRITON_VERSION }} cache-from: type=gha cache-to: type=gha,mode=max context: . diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8931876..bf0ace2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,8 +7,7 @@ on: env: IMAGE: zappi/vllm-openai - VLLM_VERSION: 0.10.2 - TRITON_VERSION: 3.2.0 + VERSION: 0.10.2 jobs: build: @@ -33,8 +32,7 @@ jobs: with: images: ${{ env.IMAGE }} tags: | - type=raw,value=${{ env.VLLM_VERSION }}-triton-${{ env.TRITON_VERSION }} - type=raw,value=${{ env.VLLM_VERSION }} + type=raw,value=${{ env.VERSION }},prefix=v - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -42,9 +40,6 @@ jobs: - name: Test building of image uses: docker/build-push-action@v6 with: - build-args: | - VLLM_VERSION=${{ env.VLLM_VERSION }} - TRITON_VERSION=${{ env.TRITON_VERSION }} cache-from: type=gha cache-to: type=gha,mode=max context: . diff --git a/Dockerfile b/Dockerfile index 505003b..bd52886 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,4 @@ -ARG VLLM_VERSION -FROM vllm/vllm-openai:v${VLLM_VERSION} +FROM vllm/vllm-openai:v0.10.2 -ARG TRITON_VERSION RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system triton==${TRITON_VERSION} + uv pip install --system triton==3.2.0