tmcdonell · tomsmeding · Aug 13, 2025 · Sep 14, 2025 · Sep 14, 2025 · Sep 16, 2025
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -9,31 +9,27 @@ environment:
   global:
     STACK_ROOT: "c:\\sr"
   matrix:
-    - GHC: "8.10"
-    - GHC: "8.8"
-    - GHC: "8.6"
+    - GHC: "9.10"
+    - GHC: "9.6"
+    - GHC: "9.0"
     - GHC: "8.4"
-    - GHC: "8.2"
-    - GHC: "8.0"
-    # - GHC: "7.10"
-    # - GHC: "7.8"  # failed to install ghc: https://ci.appveyor.com/project/tmcdonell/cuda/build/1.0.4/job/ufhtj0klyq73psas#L149
 
 before_build:
   # http://help.appveyor.com/discussions/problems/6312-curl-command-not-found
   - set PATH=C:\Program Files\Git\mingw64\bin;%PATH%
   - set PATH=C:\Users\appveyor\AppData\Roaming\local\bin;%PATH%
 
-  # install CUDA-9.0
-  - appveyor DownloadFile "https://developer.nvidia.com/compute/cuda/9.0/Prod/network_installers/cuda_9.0.176_windows_network-exe" -FileName install_cuda.exe
-  - install_cuda.exe -s compiler_9.0 cudart_9.0 cublas_9.0 cublas_dev_9.0 cufft_9.0 cufft_dev_9.0 cusolver_9.0 cusolver_dev_9.0 cusparse_9.0 cusparse_dev_9.0
-  - set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.0\nvvm\bin;%PATH%
-  - set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin;%PATH%
+  # install CUDA-13.0
+  - appveyor DownloadFile "https://developer.download.nvidia.com/compute/cuda/13.0.2/network_installers/cuda_13.0.2_windows_network.exe" -FileName install_cuda.exe
+  - install_cuda.exe -s crt_13.0 cudart_13.0 nvcc_13.0
+  - set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v13.0\nvvm\bin;%PATH%
+  - set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin;%PATH%
   - nvcc --version
 
   # CUDA refuses to install the driver if no compatible GPU can be found, so
   # copy these .dll files manually
-  - appveyor DownloadFile "https://drive.google.com/uc?export=download&id=14x0RX8QlHQ6vKhimbR4FDRgfP7EoHfgc" -FileName nvdriver-9.0.176.7z
-  - 7z x nvdriver-9.0.176.7z -oC:\Windows\System32
+  # - appveyor DownloadFile "https://drive.google.com/uc?export=download&id=14x0RX8QlHQ6vKhimbR4FDRgfP7EoHfgc" -FileName nvdriver-9.0.176.7z
+  # - 7z x nvdriver-9.0.176.7z -oC:\Windows\System32
 
   # install stack
   - appveyor DownloadFile "https://www.stackage.org/stack/windows-x86_64" -FileName stack.zip

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
@@ -7,6 +7,7 @@ on:
   push:
     paths:
       - '.github/workflows/ci-linux.yml'
+      - 'Setup.hs'
       - 'stack*.yaml'
       - '*.cabal'
       - '*/src/**'
@@ -18,20 +19,20 @@ jobs:
     strategy:
       matrix:
         ghc:
-          - "8.10"
-          - "8.8"
-          - "8.6"
-          - "8.4"
-          - "8.2"
-          - "8.0"
-          - "7.8"
-        cuda:
-          - "10.2"
-          - "10.1"
-          - "10.0"
+          - "9.10"
+          - "9.8"
+          - "9.6"
+          - "9.4"
           - "9.2"
-          - "9.1"
           - "9.0"
+          # - "8.10"  # save some resources
+          # - "8.8"
+          # - "8.6"
+          - "8.4"
+        cuda:
+          - "13.0"
+          - "12.9"
+          # - "12.5"  # save some resources
 
         # include:
         #   - os: windows-latest
@@ -43,16 +44,16 @@ jobs:
       HADDOCK_FLAGS: "--haddock --no-haddock-deps --no-haddock-hyperlink-source --haddock-arguments=\"--no-print-missing-docs\""
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v5
 
       - run: ln -s stack-${{ matrix.ghc }}.yaml stack.yaml
 
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: snapshot.pkgdb
           key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-snapshot.pkgdb
 
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: |
             ~/.local/bin
@@ -61,7 +62,6 @@ jobs:
             .stack-work
           key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-${{ hashFiles('snapshot.pkgdb') }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-${{ hashFiles('snapshot.pkgdb') }}
             ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-
             ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-
 
@@ -80,12 +80,11 @@ jobs:
       - name: Install CUDA
         run: |
           MATRIX_CUDA=${{ matrix.cuda }}
-          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
-          sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
-          sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
-          sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
+          UBUNTUVER=$(sed -n '/^DISTRIB_RELEASE=/ { s/.*=//; s/\.//; p; q; }' /etc/lsb-release)
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu$UBUNTUVER/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
           sudo apt-get update
-          sudo apt-get -y install cuda-${MATRIX_CUDA/./-}
+          sudo apt-get -y install cuda-{runtime,compiler,libraries,libraries-dev}-${MATRIX_CUDA/./-}
           echo "CUDA_HOME=/usr/local/cuda-${MATRIX_CUDA}" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=/usr/local/cuda-${MATRIX_CUDA}/lib64:$(stack exec ghc -- --print-libdir)/rts:/usr/local/cuda-${MATRIX_CUDA}/nvvm/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
           echo "/usr/local/cuda-${MATRIX_CUDA}/bin" >> $GITHUB_PATH

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,11 +5,19 @@ Notable changes to the project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
 **NOTE:** The version numbers of this package roughly align to the latest
-version of the CUDA API this package is built against This means that this
+version of the CUDA API this package is built against. This means that this
 package _DOES NOT_ follow the PVP, or indeed any sensible version scheme,
 because NVIDIA are A-OK introducing breaking changes in minor updates.
 
 
+## [0.13.0.0] - ???
+### Added
+  * Support for CUDA-13
+
+### Removed
+  * A number of fields from DeviceProperties, as they have been removed from
+    `cudaDeviceProp`. Use `Foreign.CUDA.Driver.Device.attribute` to query them.
+
 ## [0.12.8.0] - 2025-08-21
 ### Added
   * Support for CUDA-12

diff --git a/README.md b/README.md
@@ -150,10 +150,13 @@ Here is an incomplete historical list of missing bindings. Pull requests welcome
 - cuGraphMemAllocNodeGetParams
 - cuGraphMemFreeNodeGetParams
 
-### CUDA-12
+### CUDA >= 12
 
 A lot. PRs welcome.
 
+- CUDA-12.3
+  - Edge data in the driver Graph API (`cuGraphAddDependencies_v2` etc.)
+
 
 # Old compatibility notes
 

diff --git a/Setup.hs b/Setup.hs
@@ -1,14 +1,22 @@
+-- Decouple from GHC's default language setting, so that it's easier
+-- to maintain compatibility with old GHCs.
+{-# LANGUAGE Haskell2010     #-}
+{-# OPTIONS_GHC -Wall        #-}
+
+{-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE CPP             #-}
 {-# LANGUAGE DataKinds       #-}
+{-# LANGUAGE KindSignatures  #-}
 {-# LANGUAGE QuasiQuotes     #-}
 {-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TupleSections   #-}
 
 -- The MIN_VERSION_Cabal macro was introduced with Cabal-1.24 (??)
 #ifndef MIN_VERSION_Cabal
 #define MIN_VERSION_Cabal(major1,major2,minor) 0
 #endif
 
-import Distribution.PackageDescription
+import Distribution.PackageDescription                              hiding ( Flag )
 import Distribution.Simple
 import Distribution.Simple.BuildPaths
 import Distribution.Simple.Command
@@ -45,7 +53,6 @@ import Distribution.Simple.PackageDescription
 import Distribution.Utils.Path (SymbolicPath, FileOrDir(File, Dir), Lib, Include, Pkg, CWD, makeSymbolicPath, interpretSymbolicPath, makeRelativePathEx)
 import qualified Distribution.Types.LocalBuildConfig as LBC
 #else
-import Data.Kind (Constraint)
 #endif
 
 import Control.Exception
@@ -249,7 +256,9 @@ cudaLibraryPaths (Platform arch os) installPath = [ installPath </> path | path
         (Windows, X86_64)  -> ["lib/x64"]
         (OSX,     _)       -> ["lib"]    -- MacOS does not distinguish 32- vs. 64-bit paths
         (_,       X86_64)  -> ["lib64", "lib"]  -- prefer lib64 for 64-bit systems
+#if MIN_VERSION_Cabal(2,4,0)
         (_,       AArch64) -> ["lib64", "lib"]
+#endif
         _                  -> ["lib"]           -- otherwise
 
 
@@ -734,7 +743,6 @@ die' _ = die
 -- Compatibility across Cabal 3.14 symbolic paths.
 -- If we want to drop pre-Cabal-3.14 compatibility at some point, this should all be merged in above.
 
-workingDirFlag :: HasCommonFlags flags => flags -> Flag CWDPath
 lbiCWD :: LocalBuildInfo -> Maybe CWDPath
 
 #if MIN_VERSION_Cabal(3,14,0)
@@ -745,6 +753,7 @@ type CWDPath = SymbolicPath CWD ('Dir Pkg)
 regVerbosity :: RegisterFlags -> Flag Verbosity
 regVerbosity = setupVerbosity . registerCommonFlags
 
+workingDirFlag :: HasCommonFlags flags => flags -> Flag CWDPath
 workingDirFlag = setupWorkingDir . getCommonFlags
 
 lbiCWD = flagToMaybe . setupWorkingDir . configCommonFlags . LBC.configFlags . LBC.packageBuildDescr . localBuildDescr
@@ -772,6 +781,7 @@ type CWDPath = ()
 
 -- regVerbosity is still present as an actual field in Cabal 3.12
 
+workingDirFlag :: flags -> Flag CWDPath
 workingDirFlag _ = NoFlag
 
 lbiCWD _ = Nothing
@@ -785,10 +795,6 @@ makeRelativePathEx = id
 interpretSymbolicPath :: Maybe CWDPath -> FilePath -> FilePath
 interpretSymbolicPath _ = id
 
-type HasCommonFlags flags = () :: Constraint
-getCommonFlags :: flags -> ()
-getCommonFlags _ = ()
-
 readHookedBuildInfoWithCWD :: Verbosity -> Maybe CWDPath -> FilePath -> IO HookedBuildInfo
 readHookedBuildInfoWithCWD verb _ path = readHookedBuildInfo verb path
 #endif

diff --git a/cbits/stubs.c b/cbits/stubs.c
@@ -3,6 +3,7 @@
  */
 
 #include "cbits/stubs.h"
+#include <string.h>  // memset
 
 #if CUDART_VERSION >= 7000
 cudaError_t cudaLaunchKernel_simple(const void *func, unsigned int gridX, unsigned int gridY, unsigned int gridZ, unsigned int blockX, unsigned int blockY, unsigned int blockZ, void **args, size_t sharedMem, cudaStream_t stream)
@@ -196,7 +197,13 @@ CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev)
 
 CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev)
 {
+#if CUDA_VERSION >= 13000
+    CUctxCreateParams params;
+    memset(&params, 0, sizeof params);
+    return cuCtxCreate_v4(pctx, &params, flags, dev);
+#else
     return cuCtxCreate_v2(pctx, flags, dev);
+#endif
 }
 
 CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name)
@@ -424,3 +431,18 @@ CUresult CUDAAPI cuGraphExecKernelNodeSetParams_simple(CUgraphExec hGraphExec, C
 }
 #endif
 
+#if CUDA_VERSION >= 13000
+// This is the signature of the CUDA <=12 version; much easier to shim here than in Haskell.
+CUresult cuMemAdvise_device(CUdeviceptr dptr, size_t count, CUmem_advise advice, CUdevice device)
+{
+  return cuMemAdvise(dptr, count, advice, (CUmemLocation){.id = device, .type = CU_MEM_LOCATION_TYPE_DEVICE});
+}
+
+// This is the signature of the CUDA <=12 version; much easier to shim here than in Haskell.
+CUresult cuMemPrefetchAsync_device(CUdeviceptr dptr, size_t count, CUdevice device, CUstream hStream)
+{
+  // flags is reserved and must be 0 in CUDA 13
+  return cuMemPrefetchAsync(dptr, count, (CUmemLocation){.id = device, .type = CU_MEM_LOCATION_TYPE_DEVICE}, 0, hStream);
+}
+#endif
+
diff --git a/cbits/stubs.h b/cbits/stubs.h
@@ -184,6 +184,11 @@ CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
 CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags);
 #endif
 
+#if CUDA_VERSION >= 13000
+CUresult cuMemAdvise_device(CUdeviceptr dptr, size_t count, CUmem_advise advice, CUdevice device);
+CUresult cuMemPrefetchAsync_device(CUdeviceptr dptr, size_t count, CUdevice device, CUstream hStream);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/cuda.cabal b/cuda.cabal
@@ -1,7 +1,7 @@
 cabal-version:          1.24
 
 Name:                   cuda
-Version:                0.12.8.0
+Version:                0.13.0.0
 Synopsis:               FFI binding to the CUDA interface for programming NVIDIA GPUs
 Description:
     The CUDA library provides a direct, general purpose C-like SPMD programming
@@ -30,7 +30,7 @@ Description:
     .
     * "Foreign.CUDA.Runtime"
     .
-    Tested with library versions up to CUDA-12.8. See also the
+    Tested with library versions up to CUDA-13.0. See also the
     <https://travis-ci.org/tmcdonell/cuda travis-ci.org> build matrix for
     version compatibility.
     .
@@ -177,6 +177,6 @@ source-repository head
 source-repository this
     type:               git
     location:           https://github.com/tmcdonell/cuda
-    tag:                v0.12.8.0
+    tag:                v0.13.0.0
 
 -- vim: nospell