From ee1390e29205fd17110399ac8c4d1b52f7b05bfa Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Thu, 20 Mar 2025 15:19:08 -0400 Subject: [PATCH 01/10] upgrade deps --- src/device.rs | 7 +++++++ src/opencl/mod.rs | 10 ++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/device.rs b/src/device.rs index b5c5abe..5412cc2 100644 --- a/src/device.rs +++ b/src/device.rs @@ -28,6 +28,8 @@ const AMD_DEVICE_ON_APPLE_VENDOR_STRING: &str = "AMD"; const AMD_DEVICE_ON_APPLE_VENDOR_ID: u32 = 0x1021d00; const NVIDIA_DEVICE_VENDOR_STRING: &str = "NVIDIA Corporation"; const NVIDIA_DEVICE_VENDOR_ID: u32 = 0x10de; +const APPLE_DEVICE_VENDOR_ID: u32 = 0x1027F00; +const APPLE_DEVICE_VENDOR_STRING: &str = "Apple"; // The owned CUDA contexts are stored globally. Each devives contains an unowned reference, so // that devices can be cloned. @@ -180,6 +182,8 @@ pub enum Vendor { Intel, /// GPU by NVIDIA. Nvidia, + /// GPU by Apple. + Apple, } impl TryFrom<&str> for Vendor { @@ -191,6 +195,7 @@ impl TryFrom<&str> for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_STRING => Ok(Self::Amd), INTEL_DEVICE_VENDOR_STRING => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_STRING => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_STRING => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(vendor.to_string())), } } @@ -205,6 +210,7 @@ impl TryFrom for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_ID => Ok(Self::Amd), INTEL_DEVICE_VENDOR_ID => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_ID => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_ID => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(format!("0x{:x}", vendor))), } } @@ -216,6 +222,7 @@ impl fmt::Display for Vendor { Self::Amd => AMD_DEVICE_VENDOR_STRING, Self::Intel => INTEL_DEVICE_VENDOR_STRING, Self::Nvidia => NVIDIA_DEVICE_VENDOR_STRING, + Self::Apple => APPLE_DEVICE_VENDOR_STRING, }; write!(f, "{}", vendor) } diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index c8e32b4..1d0707c 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -152,10 +152,7 @@ impl Program { let mut program = opencl3::program::Program::create_from_source(&context, src)?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { - error: build_error, - message: Some(log), - }); + return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); } debug!( "Building kernel ({}) from source: done.", @@ -194,10 +191,7 @@ impl Program { }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { - error: build_error, - message: Some(log), - }); + return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); } let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; From 23b73cbe20b2e2b6441270c5902d0df6c8813904 Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Fri, 21 Mar 2025 03:09:36 -0400 Subject: [PATCH 02/10] fix clippy --- src/cuda/mod.rs | 14 +++++++++++--- src/cuda/utils.rs | 3 ++- src/opencl/mod.rs | 24 +++++++++++++++++++----- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index bdcf83c..b65fd8b 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -199,7 +199,9 @@ impl Program { let bytes_len = mem::size_of_val(slice); // Transmuting types is safe as long a sizes match. - let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; + let bytes = unsafe { + std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) + }; // It is only unsafe as long as the buffer isn't initialized, but that's what we do next. let mut buffer = unsafe { DeviceBuffer::::uninitialized(bytes_len)? }; @@ -239,7 +241,10 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) + std::slice::from_raw_parts( + data.as_ptr() as *const u8, + mem::size_of_val(data), + ) }; // It is safe as we synchronize the stream after the call. @@ -255,7 +260,10 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) + std::slice::from_raw_parts_mut( + data.as_mut_ptr() as *mut u8, + mem::size_of_val(data), + ) }; // It is safe as we synchronize the stream after the call. diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs index 207d658..56f2da8 100644 --- a/src/cuda/utils.rs +++ b/src/cuda/utils.rs @@ -11,7 +11,8 @@ use crate::error::{GPUError, GPUResult}; // are never used directly, they are only accessed through [`cuda::Device`] which contains an // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned, // but that is needed for creating the kernels. -pub(crate) struct CudaContexts(#[allow(unused)] Vec); +#[allow(dead_code)] +pub(crate) struct CudaContexts(Vec); unsafe impl Sync for CudaContexts {} unsafe impl Send for CudaContexts {} diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index 1d0707c..75173d3 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -152,7 +152,10 @@ impl Program { let mut program = opencl3::program::Program::create_from_source(&context, src)?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } debug!( "Building kernel ({}) from source: done.", @@ -191,7 +194,10 @@ impl Program { }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; @@ -258,7 +264,9 @@ impl Program { )? }; // Transmuting types is safe as long a sizes match. - let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; + let bytes = unsafe { + std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) + }; // Write some data right-away. This makes a significant performance different. unsafe { self.queue @@ -312,7 +320,10 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) + std::slice::from_raw_parts( + data.as_ptr() as *const u8, + mem::size_of_val(data), + ) }; unsafe { self.queue @@ -327,7 +338,10 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) + std::slice::from_raw_parts_mut( + data.as_mut_ptr() as *mut u8, + mem::size_of_val(data), + ) }; unsafe { self.queue From 353ae3c92879d3c730487bc783f0dafc98aeaa87 Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Thu, 27 Mar 2025 23:43:39 -0400 Subject: [PATCH 03/10] allow(unused) + cargo fmt --- src/cuda/mod.rs | 14 +++----------- src/cuda/utils.rs | 3 +-- src/opencl/mod.rs | 14 +++----------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index b65fd8b..bdcf83c 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -199,9 +199,7 @@ impl Program { let bytes_len = mem::size_of_val(slice); // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // It is only unsafe as long as the buffer isn't initialized, but that's what we do next. let mut buffer = unsafe { DeviceBuffer::::uninitialized(bytes_len)? }; @@ -241,10 +239,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. @@ -260,10 +255,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs index 56f2da8..207d658 100644 --- a/src/cuda/utils.rs +++ b/src/cuda/utils.rs @@ -11,8 +11,7 @@ use crate::error::{GPUError, GPUResult}; // are never used directly, they are only accessed through [`cuda::Device`] which contains an // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned, // but that is needed for creating the kernels. -#[allow(dead_code)] -pub(crate) struct CudaContexts(Vec); +pub(crate) struct CudaContexts(#[allow(unused)] Vec); unsafe impl Sync for CudaContexts {} unsafe impl Send for CudaContexts {} diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index 75173d3..c8e32b4 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -264,9 +264,7 @@ impl Program { )? }; // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // Write some data right-away. This makes a significant performance different. unsafe { self.queue @@ -320,10 +318,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; unsafe { self.queue @@ -338,10 +333,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; unsafe { self.queue From 66da3fdb63e24aa1d2db1d1e37db4ffd7f999ff3 Mon Sep 17 00:00:00 2001 From: Steve Loeppky Date: Tue, 1 Jul 2025 11:43:11 -0700 Subject: [PATCH 04/10] Handle Apple GPU PCI bus information properly Apple Silicon GPUs are integrated into the SoC and don't use PCI bus. This change ensures synthetic PCI-ID assignment for Apple devices. --- src/opencl/utils.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/opencl/utils.rs b/src/opencl/utils.rs index eab3b3a..270cef3 100644 --- a/src/opencl/utils.rs +++ b/src/opencl/utils.rs @@ -37,6 +37,13 @@ fn get_pci_id(device: &opencl3::device::Device) -> GPUResult { let device_id = device.pci_slot_id_nv()? as u16; (bus_id << 8) | device_id } + Vendor::Apple => { + // Apple Silicon GPUs are integrated into the SoC and don't use PCI bus + // Return an error to trigger synthetic PCI-ID assignment + return Err(GPUError::Generic( + "Apple GPUs don't have PCI bus information".to_string() + )); + } }; Ok(id.into()) } From 101912369049581ea73b9befc7c376a967a08e64 Mon Sep 17 00:00:00 2001 From: Steve Loeppky Date: Tue, 1 Jul 2025 11:47:05 -0700 Subject: [PATCH 05/10] Fix formatting - add missing comma --- src/opencl/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/opencl/utils.rs b/src/opencl/utils.rs index 270cef3..1a38524 100644 --- a/src/opencl/utils.rs +++ b/src/opencl/utils.rs @@ -41,7 +41,7 @@ fn get_pci_id(device: &opencl3::device::Device) -> GPUResult { // Apple Silicon GPUs are integrated into the SoC and don't use PCI bus // Return an error to trigger synthetic PCI-ID assignment return Err(GPUError::Generic( - "Apple GPUs don't have PCI bus information".to_string() + "Apple GPUs don't have PCI bus information".to_string(), )); } }; From df2836680b48c637b3a45ec725ef063e08f1eb8c Mon Sep 17 00:00:00 2001 From: Steve Loeppky Date: Tue, 1 Jul 2025 11:49:38 -0700 Subject: [PATCH 06/10] trigger CI rebuild From f6aaf4cabb0d5d9485614307b5f9a0b257212a57 Mon Sep 17 00:00:00 2001 From: Steve Loeppky Date: Tue, 1 Jul 2025 11:51:29 -0700 Subject: [PATCH 07/10] Fix CI: install clippy and rustfmt components --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5fd8ea..ae6c9bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,8 @@ jobs: name: Clippy steps: - uses: actions/checkout@v4 + - name: Install clippy + run: rustup component add clippy - name: Run cargo clippy run: cargo clippy --all-targets --workspace --all-features -- -D warnings @@ -29,6 +31,8 @@ jobs: name: Checking fmt steps: - uses: actions/checkout@v4 + - name: Install rustfmt + run: rustup component add rustfmt - name: Run cargo fmt run: cargo fmt --all -- --check From 669eaa5a351a7cb0f57cfba309c3a25fc0ff0867 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 2 Jul 2025 10:24:08 +1000 Subject: [PATCH 08/10] chore: add clippy and rustfmt components --- rust-toolchain | 1 + 1 file changed, 1 insertion(+) diff --git a/rust-toolchain b/rust-toolchain index dbd4126..a45db19 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1,2 @@ 1.81.0 +components = ["clippy", "rustfmt"] From 9aeaf3c8000c00a792d3edbd95e3eaca60359fd6 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 2 Jul 2025 10:24:38 +1000 Subject: [PATCH 09/10] chore: remove rustfmt and clippy installs from GHA --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ae6c9bf..d5fd8ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,8 +21,6 @@ jobs: name: Clippy steps: - uses: actions/checkout@v4 - - name: Install clippy - run: rustup component add clippy - name: Run cargo clippy run: cargo clippy --all-targets --workspace --all-features -- -D warnings @@ -31,8 +29,6 @@ jobs: name: Checking fmt steps: - uses: actions/checkout@v4 - - name: Install rustfmt - run: rustup component add rustfmt - name: Run cargo fmt run: cargo fmt --all -- --check From e741939ebef58a8d717ca68bcb0519563b1df22c Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 2 Jul 2025 10:25:59 +1000 Subject: [PATCH 10/10] fix(src): toolchain formatting --- rust-toolchain | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index a45db19..251f956 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,2 +1,3 @@ -1.81.0 +[toolchain] +channel = "1.81.0" components = ["clippy", "rustfmt"]