Skip to content

Commit 31d53ff

Browse files
committed
build: 更新 edition、cuda driver 和依赖项版本
Signed-off-by: YdrMaster <ydrml@hotmail.com>
1 parent 25a1da6 commit 31d53ff

File tree

14 files changed

+86
-81
lines changed

14 files changed

+86
-81
lines changed

Cargo.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[workspace]
22
members = ["operators"]
3-
resolver = "2"
3+
resolver = "3"
4+
package.edition = "2024"
45

56
[workspace.dependencies]
67
clrt = { git = "https://github.com/InfiniTensor/clrt", rev = "984ac7a" }
@@ -11,8 +12,8 @@ infini-op = { git = "https://github.com/InfiniTensor/infini-toolkit", rev = "e83
1112
infini-ccl = { git = "https://github.com/InfiniTensor/infini-toolkit", rev = "e8362c3" }
1213
search-infini-tools = { git = "https://github.com/InfiniTensor/infini-toolkit", rev = "e8362c3" }
1314

14-
cuda = { git = "https://github.com/YdrMaster/cuda-driver", rev = "f3ffbcc" }
15-
cublas = { git = "https://github.com/YdrMaster/cuda-driver", rev = "f3ffbcc" }
16-
nccl = { git = "https://github.com/YdrMaster/cuda-driver", rev = "f3ffbcc" }
17-
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "f3ffbcc" }
18-
search-corex-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "f3ffbcc" }
15+
cuda = { git = "https://github.com/YdrMaster/cuda-driver", rev = "1751f0a" }
16+
cublas = { git = "https://github.com/YdrMaster/cuda-driver", rev = "1751f0a" }
17+
nccl = { git = "https://github.com/YdrMaster/cuda-driver", rev = "1751f0a" }
18+
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "1751f0a" }
19+
search-corex-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "1751f0a" }

operators/Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "operators"
33
version = "0.0.0"
4-
edition = "2021"
4+
edition.workspace = true
55
authors = ["YdrMaster <ydrml@hotmail.com>"]
66

77
[features]
@@ -13,13 +13,13 @@ nvidia-gpu = ["cuda", "cublas", "nccl", "fslock", "libloading"]
1313
iluvatar-gpu = ["cuda", "cublas", "fslock", "libloading"]
1414

1515
[dependencies]
16-
digit-layout = "0.2"
17-
ndarray-layout = "0.1"
16+
digit-layout = "0.3"
17+
ndarray-layout = "0.2"
1818
rayon = "1.10"
19-
lru = "0.12"
19+
lru = "0.14"
2020
num-traits = "0.2"
2121
itertools = "0.14"
22-
half = "2.4"
22+
half = "2.6"
2323
log = "0.4"
2424

2525
gemm = { version = "0.18", optional = true }

operators/src/add/cuda/mod.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ use crate::{
55
utils::gcd,
66
ByteOf, LaunchError, QueueAlloc, SchemeDiversity,
77
};
8+
use cuda::params;
89
use digit_layout::DigitLayout;
910
use lru::LruCache;
1011
use std::{
11-
ffi::{c_uint, CString},
12+
ffi::c_uint,
1213
sync::{Arc, Mutex},
1314
};
1415

@@ -70,18 +71,15 @@ impl crate::Operator for Operator {
7071
b_base,
7172
..
7273
} = args;
73-
let params = cuda::params![c_base, a_base, b_base];
7474

7575
self.schemes
7676
.lock()
7777
.unwrap()
7878
.get_or_insert(dt, || compile(&self.handle, dt))
7979
.launch(
80-
CString::new("add").unwrap(),
81-
grid_dims as c_uint,
82-
block_dims as c_uint,
83-
params.as_ptr(),
84-
0,
80+
c"add",
81+
(grid_dims as c_uint, block_dims as c_uint, 0),
82+
&params![*c_base, *a_base, *b_base].to_ptrs(),
8583
queue_alloc.queue(),
8684
);
8785
Ok(())

operators/src/add_rows/cuda/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::{
66
utils::gcd,
77
ByteOf, LaunchError, QueueAlloc, SchemeDiversity,
88
};
9+
use cuda::params;
910
use digit_layout::DigitLayout;
1011
use lru::LruCache;
1112
use std::{
@@ -77,7 +78,7 @@ impl crate::Operator for Operator {
7778
let &[bsi] = cast(&[bsi], unit_idx as usize).as_slice() else {
7879
todo!()
7980
};
80-
let params = cuda::params![dst_base, src_base, idx_base, bsd, msd, kss, bsi];
81+
let params = params![*dst_base, *src_base, *idx_base, bsd, msd, kss, bsi];
8182
let block = gcd(self.max_threads_block, n);
8283
let dimx = n.div_ceil(block);
8384
let key = SchemeKey { dt: dst_layout.dt };
@@ -89,10 +90,8 @@ impl crate::Operator for Operator {
8990
.clone();
9091
scheme.module.launch(
9192
&scheme.name,
92-
(b as _, m as _, dimx as _),
93-
block as u32,
94-
params.as_ptr(),
95-
0,
93+
((b as _, m as _, dimx as _), block as u32, 0),
94+
&params.to_ptrs(),
9695
queue_alloc.queue(),
9796
);
9897
Ok(())

operators/src/common/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ pub use tensor::TensorLayout;
1515
pub use unsigned::Unsigned;
1616
pub use workspace::Workspace;
1717

18-
pub(crate) use diversity::{SchemeCacheSize, SchemeDiversity};
18+
pub(crate) use diversity::SchemeDiversity;
1919

2020
pub mod utils {
2121
use super::{rank_not_support, type_mismatch, LaunchError};

operators/src/fuesd_softmax/cuda/mod.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::{
66
cuda::{Gpu, Handle, ModuleBox},
77
strides_not_support, type_not_support, ByteOf, LaunchError, QueueAlloc,
88
};
9+
use cuda::params;
910
use digit_layout::types::F16;
1011
use std::{
1112
collections::HashMap,
@@ -73,26 +74,22 @@ impl crate::Operator for Operator {
7374
let sh = (sh / unit) as i32;
7475
let ss = (ss / unit) as i32;
7576
let att_len = att_len as u32;
76-
let params = cuda::params![att_base, 0i32, sh, ss, att_len];
77+
let params = params![*att_base, 0i32, sh, ss, att_len];
7778

7879
if att_len <= block_size {
7980
scheme.module.launch(
8081
&scheme.padding,
81-
grid_dims,
82-
att_len,
83-
params.as_ptr(),
84-
0,
82+
(grid_dims, att_len, 0),
83+
&params.to_ptrs(),
8584
queue.queue(),
8685
);
8786
} else {
8887
let num_items_thread = att_len.div_ceil(block_size);
8988
let smem = (num_items_thread * block_size) as usize;
9089
scheme.module.launch(
9190
&scheme.folding,
92-
grid_dims,
93-
block_size,
94-
params.as_ptr(),
95-
smem * size_of::<c_float>(),
91+
(grid_dims, block_size, smem * size_of::<c_float>()),
92+
&params.to_ptrs(),
9693
queue.queue(),
9794
);
9895
}

operators/src/gelu/cuda/mod.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use crate::{
55
utils::gcd,
66
ByteOf, LaunchError, QueueAlloc,
77
};
8+
use cuda::params;
89
use digit_layout::types::F16;
910
use std::{
1011
ffi::{c_uint, CString},
@@ -60,15 +61,12 @@ impl crate::Operator for Operator {
6061
return Err(strides_not_support(""));
6162
};
6263

63-
let params = cuda::params![base];
6464
let block = gcd(self.max_threads_block, d);
6565

6666
self.module.launch(
6767
CString::new(NAME).unwrap(),
68-
(n * d).div_ceil(block) as c_uint,
69-
block as u32,
70-
params.as_ptr(),
71-
0,
68+
((n * d).div_ceil(block) as c_uint, block as c_uint, 0),
69+
&params![*base].to_ptrs(),
7270
queue_alloc.queue(),
7371
);
7472
Ok(())

operators/src/handle/cuda/alloc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ impl<'ctx> Alloc<DevMem<'ctx>> for Stream<'ctx> {
108108

109109
#[inline]
110110
fn free(&self, mem: DevMem<'ctx>) {
111-
mem.drop_on(self)
111+
Stream::free(self, mem);
112112
}
113113
}
114114

operators/src/handle/cuda/module.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,11 @@ impl ModuleBox {
3939
pub fn launch(
4040
&self,
4141
name: impl AsRef<CStr>,
42-
grid_dims: impl Into<Dim3>,
43-
block_dims: impl Into<Dim3>,
44-
params: *const *const c_void,
45-
shared_mem: usize,
42+
attrs: (impl Into<Dim3>, impl Into<Dim3>, usize),
43+
params: &[*const c_void],
4644
stream: &Stream,
4745
) {
48-
self.load(name, stream.ctx()).launch(
49-
grid_dims,
50-
block_dims,
51-
params,
52-
shared_mem,
53-
Some(stream),
54-
)
46+
stream.launch(&self.load(name, stream.ctx()), attrs, params);
5547
}
5648
}
5749

operators/src/layer_norm/cuda/mod.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ use crate::{
44
layer_norm::args::Meta,
55
shape_not_support, strides_not_support, ByteOf, LaunchError, QueueAlloc, SchemeDiversity,
66
};
7+
use cuda::params;
78
use digit_layout::DigitLayout;
89
use lru::LruCache;
910
use std::{
10-
ffi::CString,
11+
ffi::{c_uint, CString},
1112
sync::{Arc, Mutex},
1213
};
1314

@@ -82,17 +83,27 @@ impl crate::Operator for Operator {
8283

8384
let nsy = (nsy / unit) as i32;
8485
let nsx = (nsx / unit) as i32;
85-
let params = cuda::params![y_base, nsy, x_base, nsx, scale_base, bias_base, epsilon];
86+
let params = params![
87+
*y_base,
88+
nsy,
89+
*x_base,
90+
nsx,
91+
*scale_base,
92+
*bias_base,
93+
*epsilon
94+
];
8695

8796
scheme.module.launch(
8897
&scheme.name,
89-
n as u32,
90-
match scheme.ty {
91-
SchemeType::Padding => d,
92-
SchemeType::Folding { block_size } => block_size,
93-
} as u32,
94-
params.as_ptr(),
95-
0,
98+
(
99+
n as c_uint,
100+
match scheme.ty {
101+
SchemeType::Padding => d,
102+
SchemeType::Folding { block_size } => block_size,
103+
} as c_uint,
104+
0,
105+
),
106+
&params.to_ptrs(),
96107
queue_alloc.queue(),
97108
);
98109

0 commit comments

Comments
 (0)