From 37377dbfedb6938ec0d291bd0552fc56a5bb9a5d Mon Sep 17 00:00:00 2001 From: Axel Garcia Date: Mon, 5 Jan 2026 11:17:06 +0100 Subject: [PATCH 1/2] ENH: Update CUDA versions for Python packages to 11.8, 12.8 and 13.0 Version 11.8 is the latest 11 version. Versions 12.8 and 13.0 are aligned with PyTorch. --- .../build-test-package-python-cuda.yml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-test-package-python-cuda.yml b/.github/workflows/build-test-package-python-cuda.yml index d31a410..791b814 100644 --- a/.github/workflows/build-test-package-python-cuda.yml +++ b/.github/workflows/build-test-package-python-cuda.yml @@ -19,7 +19,7 @@ jobs: matrix: python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }} manylinux-platform: ["_2_28-x64","2014-x64"] - cuda-version: ["116","121","124"] + cuda-version: ["118","128","130"] steps: - uses: actions/checkout@v4 @@ -46,6 +46,9 @@ jobs: CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCUDAToolkit_ROOT=/usr/lib64/cuda${CUDA_VERSION}" CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_COMPILER=/usr/lib64/cuda${CUDA_VERSION}/bin/nvcc" CMAKE_OPTIONS="${CMAKE_OPTIONS} --config-setting=build.tool-args=-j16" + if test ${CUDA_VERSION_MAJOR} -ge 13; then + CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_ARCHITECTURES=75" + fi # The first two are not library paths but are included to be mounted in the # docker by dockcross-manylinux-build-module-wheels.sh @@ -102,7 +105,7 @@ jobs: max-parallel: 2 matrix: python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }} - cuda-version: ["124"] + cuda-version: ["128","130"] steps: - uses: actions/checkout@v4 @@ -153,7 +156,14 @@ jobs: $LIBCUDART= (Get-Item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}\bin\cudart64*dll" ).Name & nvcc --version ((Get-Content -Path pyproject.toml) -replace "itk-cudacommon","itk-cudacommon-cuda${CUDA_VERSION}") | Set-Content -Path pyproject.toml - ./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX""" + if(${CUDA_VERSION_MAJOR} -ge 13) { + $CUDA_ARCHITECTURES = "75" + } + else { + $CUDA_ARCHITECTURES = "52" + } + + ./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX"" ""-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}""" mkdir -p '${{ github.workspace }}\dist' cp 'dist\*.whl' '${{ github.workspace }}\dist' @@ -223,8 +233,8 @@ jobs: - name: Test python wheel run: | - # Find the CUDA 124 wheel for Python 3.11 dynamically - wheel=$(find wheels -name "*cuda124*cp311*manylinux_2_28_x86_64.whl" -type f | head -1) + # Find the CUDA 128 wheel for Python 3.11 dynamically + wheel=$(find wheels -name "*cuda128*cp311*manylinux_2_28_x86_64.whl" -type f | head -1) pip uninstall -y $(pip freeze | sed -E 's/(==.*|[[:space:]]+@.*)$//' | grep -E '^itk-') || true echo "Installing wheel: $wheel" pip install $wheel From ec161393c1f2c7eb98b85f1ec8cc19d1051d3424 Mon Sep 17 00:00:00 2001 From: Simon Rit Date: Mon, 5 Jan 2026 22:51:48 +0000 Subject: [PATCH 2/2] COMP: Remove use of deprecated cudaDeviceProp.clockRate --- src/itkCudaUtil.cxx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/itkCudaUtil.cxx b/src/itkCudaUtil.cxx index ebc7ea7..bad205b 100644 --- a/src/itkCudaUtil.cxx +++ b/src/itkCudaUtil.cxx @@ -85,7 +85,9 @@ CudaGetMaxFlopsDev() int max_flops_device = 0; for (int i = 0; i < numAvailableDevices; ++i) { - int flops = devices[i].multiProcessorCount * devices[i].clockRate; + int clockRate; + cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, i); + int flops = devices[i].multiProcessorCount * clockRate; if (flops > max_flops) { max_flops = flops; @@ -121,9 +123,12 @@ CudaPrintDeviceInfo(int device, bool verbose) return; } + int clockRate; + cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device); + std::cout << prop.name << std::endl; std::cout << "Compute capability: " << prop.major << "." << prop.minor << std::endl; - std::cout << "Clockrate: " << prop.clockRate << std::endl; + std::cout << "Clockrate: " << clockRate << std::endl; std::cout << "Global memory: " << prop.totalGlobalMem << std::endl; std::cout << "Constant memory: " << prop.totalConstMem << std::endl; std::cout << "Number of Multi Processors: " << prop.multiProcessorCount << std::endl;