From 37377dbfedb6938ec0d291bd0552fc56a5bb9a5d Mon Sep 17 00:00:00 2001
From: Axel Garcia <axel.garcia1997@gmail.com>
Date: Mon, 5 Jan 2026 11:17:06 +0100
Subject: [PATCH 1/2] ENH: Update CUDA versions for Python packages to 11.8,
 12.8 and 13.0

Version 11.8 is the latest 11 version.
Versions 12.8 and 13.0 are aligned with PyTorch.
---
 .../build-test-package-python-cuda.yml        | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build-test-package-python-cuda.yml b/.github/workflows/build-test-package-python-cuda.yml
index d31a410..791b814 100644
--- a/.github/workflows/build-test-package-python-cuda.yml
+++ b/.github/workflows/build-test-package-python-cuda.yml
@@ -19,7 +19,7 @@ jobs:
       matrix:
         python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }}
         manylinux-platform: ["_2_28-x64","2014-x64"]
-        cuda-version: ["116","121","124"]
+        cuda-version: ["118","128","130"]
 
     steps:
     - uses: actions/checkout@v4
@@ -46,6 +46,9 @@ jobs:
         CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCUDAToolkit_ROOT=/usr/lib64/cuda${CUDA_VERSION}"
         CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_COMPILER=/usr/lib64/cuda${CUDA_VERSION}/bin/nvcc"
         CMAKE_OPTIONS="${CMAKE_OPTIONS} --config-setting=build.tool-args=-j16"
+        if test ${CUDA_VERSION_MAJOR} -ge 13; then
+           CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_ARCHITECTURES=75"
+        fi
 
         # The first two are not library paths but are included to be mounted in the
         # docker by dockcross-manylinux-build-module-wheels.sh
@@ -102,7 +105,7 @@ jobs:
       max-parallel: 2
       matrix:
         python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }}
-        cuda-version: ["124"]
+        cuda-version: ["128","130"]
 
     steps:
     - uses: actions/checkout@v4
@@ -153,7 +156,14 @@ jobs:
         $LIBCUDART= (Get-Item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}\bin\cudart64*dll" ).Name
         & nvcc --version
         ((Get-Content -Path pyproject.toml) -replace "itk-cudacommon","itk-cudacommon-cuda${CUDA_VERSION}") | Set-Content -Path pyproject.toml
-        ./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX"""
+        if(${CUDA_VERSION_MAJOR} -ge 13) {
+          $CUDA_ARCHITECTURES = "75"
+        }
+        else {
+          $CUDA_ARCHITECTURES = "52"
+        }
+
+        ./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX"" ""-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}"""
 
         mkdir -p '${{ github.workspace }}\dist'
         cp 'dist\*.whl' '${{ github.workspace }}\dist'
@@ -223,8 +233,8 @@ jobs:
 
     - name: Test python wheel
       run: |
-        # Find the CUDA 124 wheel for Python 3.11 dynamically
-        wheel=$(find wheels -name "*cuda124*cp311*manylinux_2_28_x86_64.whl" -type f | head -1)
+        # Find the CUDA 128 wheel for Python 3.11 dynamically
+        wheel=$(find wheels -name "*cuda128*cp311*manylinux_2_28_x86_64.whl" -type f | head -1)
         pip uninstall -y $(pip freeze | sed -E 's/(==.*|[[:space:]]+@.*)$//' | grep -E '^itk-') || true
         echo "Installing wheel: $wheel"
         pip install $wheel

From ec161393c1f2c7eb98b85f1ec8cc19d1051d3424 Mon Sep 17 00:00:00 2001
From: Simon Rit <simon.rit@creatis.insa-lyon.fr>
Date: Mon, 5 Jan 2026 22:51:48 +0000
Subject: [PATCH 2/2] COMP: Remove use of deprecated cudaDeviceProp.clockRate

---
 src/itkCudaUtil.cxx | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/itkCudaUtil.cxx b/src/itkCudaUtil.cxx
index ebc7ea7..bad205b 100644
--- a/src/itkCudaUtil.cxx
+++ b/src/itkCudaUtil.cxx
@@ -85,7 +85,9 @@ CudaGetMaxFlopsDev()
   int max_flops_device = 0;
   for (int i = 0; i < numAvailableDevices; ++i)
   {
-    int flops = devices[i].multiProcessorCount * devices[i].clockRate;
+    int clockRate;
+    cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, i);
+    int flops = devices[i].multiProcessorCount * clockRate;
     if (flops > max_flops)
     {
       max_flops = flops;
@@ -121,9 +123,12 @@ CudaPrintDeviceInfo(int device, bool verbose)
     return;
   }
 
+  int clockRate;
+  cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device);
+
   std::cout << prop.name << std::endl;
   std::cout << "Compute capability: " << prop.major << "." << prop.minor << std::endl;
-  std::cout << "Clockrate: " << prop.clockRate << std::endl;
+  std::cout << "Clockrate: " << clockRate << std::endl;
   std::cout << "Global memory: " << prop.totalGlobalMem << std::endl;
   std::cout << "Constant memory: " << prop.totalConstMem << std::endl;
   std::cout << "Number of Multi Processors: " << prop.multiProcessorCount << std::endl;