artificialwisdomai · asalkeld · Feb 2, 2023 · rbtcollins · Feb 27, 2023 · rbtcollins
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/sdac-lib/Cargo.toml b/sdac-lib/Cargo.toml
@@ -6,10 +6,14 @@ license = "MIT"
 description = "Software Defined Acclerated Compute"
 homepage = "https://github.com/xertai/sdac"
 edition = "2021"
+build = "build-cuda-types.rs"
 
 [lib]
 crate-type = ["cdylib"]
 
+[build-dependencies]
+bindgen = "0.63.0"
+
 [dependencies]
 tarpc = { version = "0.31.0", features = ["full"] }
 tokio = { version = "1.24.2", features = ["macros", "net", "rt-multi-thread"] }
@@ -20,3 +24,4 @@ libc = "0.2.139"
 futures = "0.3.25"
 anyhow = "1.0.68"
 service = { version = "0.0.0", path = "../service" }
+clib = "0.2.1"
diff --git a/sdac-lib/build-cuda-types.rs b/sdac-lib/build-cuda-types.rs
@@ -0,0 +1,31 @@
+extern crate bindgen;
+
+use std::env;
+use std::path::{Path, PathBuf};
+
+// Install NVIDIA CUDA prior to building the bindings with `cargo build`.
+// https://docs.rs/bindgen/latest/bindgen/struct.Builder.html
+fn main() {
+    let cdir = std::env::var("CUDA_DIR").unwrap_or("/usr/local/cuda-11.8".to_string());
+    let cuda_dir = Path::new(&cdir);
+
+    let bindings = bindgen::Builder::default()
+        .header(cuda_dir.join("include/cuda.h").display().to_string())
+        .header(cuda_dir.join("include/cuda_runtime_api.h").display().to_string())
+        .allowlist_type("CU.*")
+        .allowlist_type("cuda.*")
+        .derive_eq(true)
+        .array_pointers_in_arguments(true)
+        .generate()
+        .unwrap();
+
+    let target_path = PathBuf::from(env::var("OUT_DIR").unwrap());
+    bindings
+        .write_to_file(target_path.join("cuda_types.rs"))
+        .expect("Couldn't write bindings!");
+
+    println!(
+        "Wrote bindings to {}",
+        target_path.join("cuda_types.rs").display()
+    );
+}
diff --git a/sdac-lib/src/device.rs b/sdac-lib/src/device.rs
@@ -0,0 +1,213 @@
+#![allow(non_upper_case_globals)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+use service::*;
+use futures::executor::block_on;
+use std::mem::size_of;
+use std::ffi::CString;
+use std::sync::{Mutex};
+use tarpc::{context};
+
+pub fn cuGetErrorString(
+    client: &Mutex<service::CudaClient>,
+    error: CUresult,
+    pStr: *mut ::std::os::raw::c_char,
+) -> CUresult {
+    let (strName, res) = block_on(
+            client
+            .lock()
+            .unwrap()
+            .cuGetErrorString(context::current(), error),
+    )
+    .unwrap();
+
+    if res != cudaError_enum_CUDA_SUCCESS {
+        return res;
+    }
+
+    let cs = CString::new(strName).unwrap();
+    unsafe {
+        libc::strcpy(pStr, cs.as_ptr());
+    }
+
+    res
+}
+
+pub fn cuGetErrorName(
+    client: &Mutex<service::CudaClient>,
+    error: CUresult,
+    pStr: *mut ::std::os::raw::c_char,
+) -> CUresult {
+    let (strName, res) = block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuGetErrorName(context::current(), error),
+    )
+    .unwrap();
+
+    if res != cudaError_enum_CUDA_SUCCESS {
+        return res;
+    }
+
+    let cs = CString::new(strName).unwrap();
+    unsafe {
+        libc::strcpy(pStr, cs.as_ptr());
+    }
+
+    res
+}
+
+pub fn cuInit(client: &Mutex<service::CudaClient>,flags: ::std::os::raw::c_uint) -> CUresult {
+    block_on(
+       client
+            .lock()
+            .unwrap()
+            .cuInit(context::current(), flags),
+    )
+    .unwrap()
+}
+
+pub fn cuDeviceGetName(client: &Mutex<service::CudaClient>,
+    name: *mut ::std::os::raw::c_char,
+    len: ::std::os::raw::c_int,
+    dev: CUdevice,
+) -> CUresult {
+    let (strName, res) = block_on(client.lock().unwrap().cuDeviceGetName(
+        context::current(),
+        len,
+        dev,
+    ))
+    .unwrap();
+
+    let cs = CString::new(strName).unwrap();
+    unsafe {
+        libc::strcpy(name, cs.as_ptr());
+    }
+
+    res
+}
+
+pub fn cuDeviceGetCount(client: &Mutex<service::CudaClient>,count: *mut ::std::os::raw::c_int) -> CUresult {
+    let (cnt, res) = block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuDeviceGetCount(context::current()),
+    )
+    .unwrap();
+
+    unsafe {
+        *count = cnt;
+    }
+
+    res
+}
+
+pub fn cuDeviceGet(client: &Mutex<service::CudaClient>,
+    device: *mut CUdevice,
+    ordinal: ::std::os::raw::c_int,
+) -> CUresult {
+    let (dev, res) = block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuDeviceGet(context::current(), ordinal),
+    )
+    .unwrap();
+
+    unsafe {
+        *device = dev;
+    }
+
+    res
+}
+
+pub fn cuMemAlloc_v2(client: &Mutex<service::CudaClient>,
+    dptr: *mut CUdeviceptr,
+    bytesize: ::std::os::raw::c_ulonglong,
+) -> CUresult {
+    let (ptr, res) = block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuMemAlloc_v2(context::current(), bytesize as usize),
+    )
+    .unwrap();
+
+    unsafe {
+        *dptr = ptr;
+    }
+
+    res
+}
+
+pub fn cuMemcpyDtoH_v2(client: &Mutex<service::CudaClient>,
+    dstHost: *mut ::std::os::raw::c_void,
+    srcDevice: CUdeviceptr,
+    ByteCount: ::std::os::raw::c_ulonglong,
+) -> CUresult {
+let (data, res) =   block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuMemcpyDtoH_v2(context::current(), srcDevice, ByteCount as usize),
+    )
+    .unwrap();
+
+    if res!= cudaError_enum_CUDA_SUCCESS {
+        return res;
+    }
+
+    unsafe {
+        libc::memcpy(dstHost, data.as_ptr() as *const libc::c_void, ByteCount as usize);
+    }
+
+    0
+}
+
+pub fn cuMemcpyHtoD_v2(client: &Mutex<service::CudaClient>,
+    dstDevice: CUdeviceptr,
+    srcHost: *const ::std::os::raw::c_void,
+    ByteCount: ::std::os::raw::c_ulonglong,
+) -> CUresult {
+    let data = unsafe{ Vec::<u8>::from_raw_parts(srcHost, ByteCount, size_of(u8))};
+    block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuMemcpyHtoD_v2(context::current(), dstDevice, data, ByteCount as usize),
+    )
+    .unwrap()
+}
+
+pub fn cuMemFree_v2(client: &Mutex<service::CudaClient>,dptr: CUdeviceptr) -> CUresult {
+    block_on(
+        client
+            .lock()
+            .unwrap()
+            .cuMemFree_v2(context::current(), dptr),
+    )
+    .unwrap()
+}
+
+pub fn cuDeviceTotalMem_v2(
+    client: &Mutex<service::CudaClient>,
+    bytes: *mut usize,
+    dev: CUdevice,
+) -> CUresult{
+    let (cnt, res) = block_on(
+        client
+        .lock()
+            .unwrap()
+            .cuDeviceTotalMem_v2(context::current(), dev),
+    )
+    .unwrap();
+
+    unsafe {
+        *bytes = cnt;
+    }
+
+    res
+}
diff --git a/sdac-lib/src/global.rs b/sdac-lib/src/global.rs
@@ -0,0 +1,33 @@
+use futures::executor::block_on;
+
+use std::borrow::BorrowMut;
+
+use std::sync::{Mutex, Once};
+use tarpc::{client, tokio_serde::formats::Json};
+
+static mut CUDA_CLIENT: Option<Mutex<service::CudaClient>> = None;
+static INIT: Once = Once::new();
+
+pub fn client<'a>() -> &'a Mutex<service::CudaClient> {
+    INIT.call_once(|| {
+        // Since this access is inside a call_once, before any other accesses, it is safe
+        unsafe {
+            let transport = block_on(tarpc::serde_transport::tcp::connect(
+                "[::1]:50055",
+                Json::default,
+            ))
+            .unwrap();
+
+            // WorldClient is generated by the service attribute. It has a constructor `new` that takes a
+            // config and any Transport as input.
+            let client = service::CudaClient::new(client::Config::default(), transport).spawn();
+
+            *CUDA_CLIENT.borrow_mut() = Some(Mutex::new(client));
+        }
+    });
+
+    // As long as this function is the only place with access to the static variable,
+    // giving out a read-only borrow here is safe because it is guaranteed no more mutable
+    // references will exist at this point or in the future.
+    unsafe { CUDA_CLIENT.as_ref().unwrap() }
+}