Merge pull request #32 from GunnarFarneback/cuda_package_extension

jw3126 · web-flow · commit 4688a1a150b2 · 2023-10-17T14:53:56.000+02:00
Support CUDA on Julia 1.9+ via a package extension.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
+          - '1.9'
           - '1'
           - 'nightly'
         os:
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "ONNXRunTime"
 uuid = "e034b28e-924e-41b2-b98f-d2bbeb830c6a"
 authors = ["Jan Weidner <jw3126@gmail.com> and contributors"]
-version = "0.3.3"
+version = "0.4.0"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
@@ -11,18 +11,27 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 
 [compat]
 ArgCheck = "2"
 CEnum = "0.4"
+CUDA = "4, 5"
 DataStructures = "0.18"
 DocStringExtensions = "0.8, 0.9"
-Requires = "1"
-julia = "1.6"
+cuDNN = "1.1"
+julia = "1.9"
+
+[extensions]
+CUDAExt = ["CUDA", "cuDNN"]
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["Test"]
+test = ["Test", "CUDA", "cuDNN"]
+
+[weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
diff --git a/README.md b/README.md
@@ -28,12 +28,24 @@ julia> model(input)
 Dict{String, Matrix{Float32}} with 1 entry:
   "output" => [2.68127 2.18192 0.525979; -0.135185 2.02199 3.75168]
 ```
-For GPU usage simply do:
+
+For GPU usage the CUDA and cuDNN packages are required and the CUDA
+runtime needs to be set to 11.8 or a later 11.x version. To set this
+up, do
+
 ```julia
-pkg> add CUDA
+pkg> add CUDA cuDNN
 
 julia> import CUDA
 
+julia> CUDA.set_runtime_version!(v"11.8")
+```
+
+Then GPU inference is simply
+
+```julia
+julia> import CUDA, cuDNN
+
 julia> ORT.load_inference(path, execution_provider=:cuda)
 ```
 
@@ -63,3 +75,54 @@ output_array = GetTensorMutableData(api, output_tensor);
 * Use the onnxruntime python bindings via [PyCall.jl](https://github.com/JuliaPy/PyCall.jl).
 * [ONNX.jl](https://github.com/FluxML/ONNX.jl)
 * [ONNXNaiveNASflux.jl](https://github.com/DrChainsaw/ONNXNaiveNASflux.jl)
+
+# Breaking Changes in version 0.4.
+
+* Support for CUDA.jl is changed from version 3 to versions 4 and 5.
+
+* Support for Julia versions less than 1.9 is dropped. The reason for
+  this is to switch the conditional support of GPUs from being based
+  on the Requires package to being a package extension. As a
+  consequence the ONNXRunTime GPU support can now be precompiled and
+  the CUDA.jl versions can be properly controlled via Compat.
+
+# Setting the CUDA Runtime Version in Tests
+
+For GPU tests using ONNXRunTime, naturally the tests must depend on
+and import CUDA and cuDNN. Additionally a supported CUDA runtime
+version needs to be used, which can be somewhat tricky to set up for
+the tests.
+
+First some background. What `CUDA.set_runtime_version!(v"11.8")`
+effectively does is to
+
+1. Add a `LocalPreferences.toml` file containing
+
+```
+[CUDA_Runtime_jll]
+version = "11.8"
+```
+
+2. In `Project.toml`, add
+```
+[extras]
+CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
+```
+
+If your test environment is defined by a `test` target in the top
+`Project.toml` you need to
+
+1. Add a `LocalPreferences.toml` in your top directory with the same
+contents as above.
+
+2. Add `CUDA_Runtime_jll` to the `extras` section of `Project.toml`.
+
+3. Add `CUDA_Runtime_jll` to the `test` target of `Project.toml`.
+
+If your test environment is defined by a `Project.toml` in the `test`
+directory, you instead need to
+
+1. Add a `test/LocalPreferences.toml` file with the same contents as
+above.
+
+2. Add `CUDA_Runtime_jll` to the `extras` section of `test/Project.toml`.
diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl
@@ -0,0 +1,11 @@
+module CUDAExt
+
+# These functions are only defined for diagnostic purposes. Otherwise
+# the CUDA extension only relies on the CUDA and cuDNN dependencies to
+# have loaded the libraries needed by ONNXRunTime's CUDA execution
+# provider.
+import CUDA
+cuda_functional() = CUDA.functional()
+cuda_runtime_version() = CUDA.runtime_version()
+
+end
diff --git a/src/ONNXRunTime.jl b/src/ONNXRunTime.jl
@@ -1,5 +1,4 @@
 module ONNXRunTime
-using Requires:@require
 
 function _perm(arr::AbstractArray{T,N}) where {T,N}
     ntuple(i->N+1-i, N)
@@ -11,13 +10,8 @@ function reversedims_lazy(arr)
     PermutedDimsArray(arr, _perm(arr))
 end
 
+include("versions.jl")
 include("capi.jl")
 include("highlevel.jl")
 
-function __init__()
-    @require CUDA="052768ef-5323-5732-b1bb-66c8b64840ba" begin
-        CUDA.functional() && include("cuda.jl")
-    end
-end
-
 end #module
diff --git a/src/cuda.jl b/src/cuda.jl
diff --git a/src/highlevel.jl b/src/highlevel.jl
@@ -65,10 +65,22 @@ function load_inference(path::AbstractString; execution_provider::Symbol=:cpu,
     if execution_provider === :cpu
         session_options = CreateSessionOptions(api)
     elseif execution_provider === :cuda
-        if !(isdefined(@__MODULE__, :CUDA))
-            @warn """
-            The $(repr(execution_provider)) requires the CUDA.jl package to be available. Try adding `import CUDA` to your code.
-            """
+        CUDAExt = Base.get_extension(@__MODULE__, :CUDAExt)
+        if isnothing(CUDAExt)
+            error("""
+            The $(repr(execution_provider)) execution provider requires the CUDA.jl and cuDNN.jl packages to be available. Try adding `import CUDA, cuDNN` to your code.
+            """)
+        elseif !getfield(CUDAExt, :cuda_functional)()
+            error("""
+            The $(repr(execution_provider)) execution provider requires CUDA to be functional. See `CUDA.functional`.
+            """)
+        else
+            cuda_runtime_version = getfield(CUDAExt, :cuda_runtime_version)()
+            if !(cuda_runtime_supported_version <= cuda_runtime_version < cuda_runtime_upper_bound)
+                error("""
+                Found CUDA runtime version $(cuda_runtime_version). The $(repr(execution_provider)) execution provider requires a CUDA runtime version of at least $(cuda_runtime_supported_version) but less than $(cuda_runtime_upper_bound). See `CUDA.set_runtime_version!` and the package README.
+                """)
+            end
         end
         session_options = CreateSessionOptions(api)
         cuda_options = OrtCUDAProviderOptions()
diff --git a/src/versions.jl b/src/versions.jl
@@ -0,0 +1,21 @@
+# Version number of the ONNXRunTime library and supported versions of
+# the CUDA runtime for GPU processing with the CUDA execution
+# provider.
+#
+# * `onnxruntime_version`: This number must match the version number
+#   reported by the ONNXRunTime library, which is verified in the
+#   tests. The only real purpose of this variable is to help keep the
+#   next one up to date when the library is updated.
+#
+# * `cuda_runtime_supported_version`: This is the lowest supported
+#   version of the ONNX runtime library, which should match the
+#   information from
+#   https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements
+#
+# * `cuda_runtime_upper_bound`: The lowest CUDA runtime version which
+#   is *not* accepted. Presumably CUDA runtime follows semantic
+#   versioning so this can automatically be set to the next major
+#   version.
+const onnxruntime_version = v"1.15.1"
+const cuda_runtime_supported_version = v"11.8"
+const cuda_runtime_upper_bound = VersionNumber(cuda_runtime_supported_version.major + 1)
diff --git a/test/LocalPreferences.toml b/test/LocalPreferences.toml
@@ -0,0 +1,2 @@
+[CUDA_Runtime_jll]
+version = "11.8"
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,6 +1,11 @@
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [compat]
-CUDA = "3"
+CUDA = "5"
+cuDNN = "1.2"
+
+[extras]
+CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,3 +1,4 @@
+include("test_versions.jl")
 include("test_highlevel.jl")
 include("test_capi.jl")
 
diff --git a/test/test_cuda.jl b/test/test_cuda.jl
@@ -1,5 +1,6 @@
 module TestCUDA
 import CUDA
+import cuDNN
 using Test
 using ONNXRunTime
 const ORT = ONNXRunTime
diff --git a/test/test_cuda_extension.jl b/test/test_cuda_extension.jl
@@ -0,0 +1,128 @@
+# This file is neither included from `runtests.jl` nor run in CI.
+#
+# Run it with `julia tests/test_cuda_extension.jl`. This requires that
+# Julia is installed with juliaup and will involve downloading of a
+# lot of big artifacts. The output will contain lots of error messages
+# from caught errors; what matters is that all testsets pass.
+
+using Test
+
+juliaup_found = false
+try run(pipeline(`juliaup --version`, stdout = devnull, stderr = devnull))
+    global juliaup_found = true
+catch e
+end
+
+if !juliaup_found
+    error("`juliaup` needs to be installed for the CUDA extension tests")
+end
+
+wait(run(`juliaup add 1.9`, wait = false))
+
+package_path = dirname(@__DIR__)
+onnx_path = joinpath(@__DIR__, "data", "copy2d.onnx")
+
+function with_environment(f::Function; cuda_runtime_version)
+    mktempdir() do env
+        write(joinpath(env, "LocalPreferences.toml"),
+              """
+              [CUDA_Runtime_jll]
+              version = "$(cuda_runtime_version)"
+              """)
+        write(joinpath(env, "Project.toml"),
+              """
+              [extras]
+              CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
+              """)
+        f(env)
+    end
+end
+
+@testset "Julia 1.9 CUDA 3" begin
+    with_environment(cuda_runtime_version = "11.8") do env
+        install_script = """
+                         using Pkg
+                         Pkg.develop(path = "$(package_path)")
+                         Pkg.add(name = "CUDA", version = "3")
+                         """
+        # CUDA 3 is not possible to install together with ONNXRunTime
+        # on Julia 1.9 due to Compat requirements.
+        @test_throws ProcessFailedException run(`julia +1.9 --project=$(env) -e "$(install_script)"`)
+    end
+end
+
+@testset "Julia 1.9 CUDA.jl $(cuda_version) CUDA runtime 11.8" for cuda_version in (4, 5)
+    with_environment(cuda_runtime_version = "11.8") do env
+        install_script = """
+                         using Pkg
+                         Pkg.develop(path = "$(package_path)")
+                         Pkg.add(name = "CUDA", version = "$(cuda_version)")
+                         Pkg.add(name = "cuDNN")
+                         """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(install_script)"`))
+        # Correct dependencies for :cuda.
+        test_script = """
+                      using ONNXRunTime, CUDA, cuDNN
+                      load_inference("$(onnx_path)", execution_provider = :cuda)
+                      """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
+        # Neither CUDA nor cuDNN loaded.
+        test_script = """
+                      using ONNXRunTime
+                      load_inference("$(onnx_path)", execution_provider = :cuda)
+                      """
+        @test_throws ProcessFailedException run(`julia +1.9 --project=$(env) -e "$(test_script)"`)
+        # Neither CUDA nor cuDNN loaded but running on CPU, so it's fine.
+        test_script = """
+                      using ONNXRunTime
+                      load_inference("$(onnx_path)", execution_provider = :cpu)
+                      """
+        # CUDA not loaded. Well, cuDNN pulls in CUDA so this passes anyway.
+        test_script = """
+                      using ONNXRunTime
+                      using cuDNN
+                      load_inference("$(onnx_path)", execution_provider = :cuda)
+                      """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
+        # CUDA not loaded but running on CPU, so it's fine.
+        test_script = """
+                      using ONNXRunTime
+                      using cuDNN
+                      load_inference("$(onnx_path)", execution_provider = :cpu)
+                      """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
+        # cuDNN not loaded.
+        test_script = """
+                      using ONNXRunTime
+                      using CUDA
+                      load_inference("$(onnx_path)", execution_provider = :cuda)
+                      """
+        @test_throws ProcessFailedException run(`julia +1.9 --project=$(env) -e "$(test_script)"`)
+        # cuDNN not loaded but running on CPU, so it's fine.
+        test_script = """
+                      using ONNXRunTime
+                      using CUDA
+                      load_inference("$(onnx_path)", execution_provider = :cpu)
+                      """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
+    end
+end
+
+@testset "Julia 1.9 CUDA.jl $(cuda_version) CUDA runtime $(cuda_runtime_version)" for cuda_version in (4, 5), cuda_runtime_version in ("11.6", "12.1")
+    with_environment(; cuda_runtime_version) do env
+        install_script = """
+                         using Pkg
+                         Pkg.develop(path = "$(package_path)")
+                         Pkg.add(name = "CUDA", version = "$(cuda_version)")
+                         Pkg.add(name = "cuDNN")
+                         """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(install_script)"`))
+        # Correct dependencies for :cuda but fails due to bad version
+        # of CUDA runtime.
+        test_script = """
+                      using ONNXRunTime, CUDA, cuDNN
+                      load_inference("$(onnx_path)", execution_provider = :cuda)
+                      """
+        @test_throws ProcessFailedException run(`julia +1.9 --project=$(env) -e "$(test_script)"`)
+    end
+end
diff --git a/test/test_versions.jl b/test/test_versions.jl

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+[CUDA_Runtime_jll]`
	`2`	`+version = "11.8"`