Skip to content

Commit 73ab8e9

Browse files
committed
Progress upgrading to the latest rust-cuda, FitsIntoDeviceRegister still broken
1 parent db0f5d5 commit 73ab8e9

File tree

10 files changed

+100
-63
lines changed

10 files changed

+100
-63
lines changed

Cargo.lock

Lines changed: 14 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

necsim/core/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ contracts = "0.6.3"
2020
serde = { version = "1.0", default-features = false, features = ["derive"] }
2121

2222
[target.'cfg(target_os = "cuda")'.dependencies]
23-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive"], optional = true }
23+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive"], optional = true }
2424

2525
[target.'cfg(not(target_os = "cuda"))'.dependencies]
26-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive", "host"], optional = true }
26+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive", "host"], optional = true }

necsim/impls/cuda/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ contracts = "0.6.3"
1515
serde = { version = "1.0", default-features = false, features = ["derive"] }
1616

1717
[target.'cfg(target_os = "cuda")'.dependencies]
18-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive"] }
18+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive"] }
1919

2020
[target.'cfg(not(target_os = "cuda"))'.dependencies]
21-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive", "host"] }
21+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive", "host"] }

necsim/impls/cuda/src/event_buffer.rs

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -271,27 +271,32 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
271271

272272
let shared_buffer_len = <Self as EventType>::SharedBuffer::<()>::len();
273273

274-
let block_dim = rust_cuda::device::utils::block_dim();
274+
let thread = rust_cuda::device::thread::Thread::this();
275+
let thread_idx = thread.idx();
276+
let thread_block = thread.block();
277+
let block_dim = thread_block.dim();
278+
let block_idx = thread_block.idx();
279+
let block_grid = thread_block.grid();
280+
let grid_dim = block_grid.dim();
275281

276282
if shared_buffer_len != (block_dim.size() * 2) {
277283
core::arch::nvptx::trap();
278284
}
279285

280-
let block_idx =
281-
rust_cuda::device::utils::block_idx().as_id(&rust_cuda::device::utils::grid_dim());
282-
let thread_idx = rust_cuda::device::utils::thread_idx().as_id(&block_dim);
286+
let block_idx = block_idx.as_id(&grid_dim);
287+
let thread_idx = thread_idx.as_id(&block_dim);
283288

284289
let idx = block_idx * shared_buffer_len + thread_idx;
285290

286-
let shared_mask: rust_cuda::device::ThreadBlockShared<
291+
let shared_mask: rust_cuda::utils::shared::r#static::ThreadBlockShared<
287292
<Self as EventType>::SharedBuffer<bool>,
288-
> = rust_cuda::device::ThreadBlockShared::new_uninit();
289-
let shared_mask_array: *mut bool = shared_mask.get().as_mut_ptr();
290-
let shared_buffer: rust_cuda::device::ThreadBlockShared<
293+
> = rust_cuda::utils::shared::r#static::ThreadBlockShared::new_uninit();
294+
let shared_mask_array: *mut bool = shared_mask.as_mut_ptr().cast();
295+
let shared_buffer: rust_cuda::utils::shared::r#static::ThreadBlockShared<
291296
<Self as EventType>::SharedBuffer<MaybeSome<<Self as EventType>::Event>>,
292-
> = rust_cuda::device::ThreadBlockShared::new_uninit();
297+
> = rust_cuda::utils::shared::r#static::ThreadBlockShared::new_uninit();
293298
let shared_buffer_array: *mut MaybeSome<<Self as EventType>::Event> =
294-
shared_buffer.get().as_mut_ptr();
299+
shared_buffer.as_mut_ptr().cast();
295300

296301
*shared_mask_array.add(thread_idx) = match self.event_mask.alias_unchecked().get(idx) {
297302
None => false,
@@ -476,27 +481,32 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
476481

477482
let shared_buffer_len = <Self as EventType>::SharedBuffer::<()>::len();
478483

479-
let block_dim = rust_cuda::device::utils::block_dim();
484+
let thread = rust_cuda::device::thread::Thread::this();
485+
let thread_idx = thread.idx();
486+
let thread_block = thread.block();
487+
let block_dim = thread_block.dim();
488+
let block_idx = thread_block.idx();
489+
let block_grid = thread_block.grid();
490+
let grid_dim = block_grid.dim();
480491

481492
if shared_buffer_len != (block_dim.size() * 2) {
482493
core::arch::nvptx::trap();
483494
}
484495

485-
let block_idx =
486-
rust_cuda::device::utils::block_idx().as_id(&rust_cuda::device::utils::grid_dim());
487-
let thread_idx = rust_cuda::device::utils::thread_idx().as_id(&block_dim);
496+
let block_idx = block_idx.as_id(&grid_dim);
497+
let thread_idx = thread_idx.as_id(&block_dim);
488498

489499
let idx = block_idx * shared_buffer_len + thread_idx;
490500

491-
let shared_mask: rust_cuda::device::ThreadBlockShared<
501+
let shared_mask: rust_cuda::utils::shared::r#static::ThreadBlockShared<
492502
<Self as EventType>::SharedBuffer<bool>,
493-
> = rust_cuda::device::ThreadBlockShared::new_uninit();
494-
let shared_mask_array: *mut bool = shared_mask.get().cast();
495-
let shared_buffer: rust_cuda::device::ThreadBlockShared<
503+
> = rust_cuda::utils::shared::r#static::ThreadBlockShared::new_uninit();
504+
let shared_mask_array: *mut bool = shared_mask.as_mut_ptr().cast();
505+
let shared_buffer: rust_cuda::utils::shared::r#static::ThreadBlockShared<
496506
<Self as EventType>::SharedBuffer<MaybeSome<<Self as EventType>::Event>>,
497-
> = rust_cuda::device::ThreadBlockShared::new_uninit();
507+
> = rust_cuda::utils::shared::r#static::ThreadBlockShared::new_uninit();
498508
let shared_buffer_array: *mut MaybeSome<<Self as EventType>::Event> =
499-
shared_buffer.get().cast();
509+
shared_buffer.as_mut_ptr().cast();
500510

501511
*shared_mask_array.add(thread_idx) = match self.event_mask.alias_unchecked().get(idx) {
502512
None => false,
@@ -618,7 +628,7 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
618628
pub unsafe fn bitonic_sort_events_step(&mut self, size: usize, stride: usize) {
619629
use core::cmp::Ordering;
620630

621-
let idx = rust_cuda::device::utils::index();
631+
let idx = rust_cuda::device::thread::Thread::this().index();
622632

623633
let pos = idx & ((self.event_mask.alias_unchecked().len().next_power_of_two() / 2) - 1);
624634

@@ -719,7 +729,7 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
719729
pub unsafe fn odd_even_sort_events_step(&mut self, size: usize, stride: usize) {
720730
use core::cmp::Ordering;
721731

722-
let idx = rust_cuda::device::utils::index();
732+
let idx = rust_cuda::device::thread::Thread::this().index();
723733

724734
let pos = 2 * idx - (idx & (stride - 1));
725735

necsim/impls/no-std/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ rand_core = "0.6"
3131
rand_distr = { version = "0.4", default-features = false, features = [] }
3232

3333
[target.'cfg(target_os = "cuda")'.dependencies]
34-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive"], optional = true }
34+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive"], optional = true }
3535

3636
[target.'cfg(not(target_os = "cuda"))'.dependencies]
37-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive", "host"], optional = true }
37+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive", "host"], optional = true }

rustcoalescence/algorithms/cuda/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ thiserror = "1.0"
2323
serde = { version = "1.0", features = ["derive"] }
2424
serde_state = "0.4"
2525
serde_derive_state = "0.4"
26-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["host"] }
26+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["host"] }

rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c
1414
necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" }
1515
rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" }
1616

17-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["host"] }
17+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["host"] }

rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
use rustcoalescence_algorithms_cuda_gpu_kernel::{
2-
BitonicGlobalSortStepKernelArgs, BitonicGlobalSortSteppableKernel,
3-
BitonicSharedSortPrepKernelArgs, BitonicSharedSortPreparableKernel,
4-
BitonicSharedSortStepKernelArgs, BitonicSharedSortSteppableKernel, EvenOddSortKernelArgs,
5-
EvenOddSortableKernel, SimulatableKernel, SimulationKernelArgs,
2+
BitonicGlobalSortStepKernelArgs, BitonicGlobalSortStepKernelPtx,
3+
BitonicGlobalSortSteppableKernel, BitonicSharedSortPrepKernelArgs,
4+
BitonicSharedSortPrepKernelPtx, BitonicSharedSortPreparableKernel,
5+
BitonicSharedSortStepKernelArgs, BitonicSharedSortStepKernelPtx,
6+
BitonicSharedSortSteppableKernel, EvenOddSortKernelArgs, EvenOddSortKernelPtx,
7+
EvenOddSortableKernel, SimulatableKernel, SimulationKernelArgs, SimulationKernelPtx,
68
};
79

810
use crate::{

rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ necsim-core-bond = { path = "../../../../necsim/core/bond" }
1616
necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["cuda"] }
1717
necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" }
1818

19-
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "7a41652", features = ["derive"] }
19+
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "626cd48", features = ["derive"] }

rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,26 +31,28 @@ use necsim_impls_no_std::cogs::{
3131
event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample},
3232
};
3333

34-
use rust_cuda::common::RustToCuda;
34+
use rust_cuda::{common::RustToCuda, safety::NoAliasing};
3535

3636
#[rust_cuda::common::kernel(
37-
pub use link_kernel! as impl SimulatableKernel<SimulationKernelArgs> for SimulationKernel
37+
pub use link_kernel! as impl SimulatableKernel<
38+
SimulationKernelArgs, SimulationKernelPtx,
39+
> for SimulationKernel
3840
)]
3941
#[allow(clippy::too_many_arguments)]
4042
#[allow(clippy::type_complexity)]
4143
pub fn simulate<
4244
M: MathsCore,
43-
H: Habitat<M> + RustToCuda,
44-
G: Rng<M, Generator: PrimeableRng> + RustToCuda,
45-
S: LineageStore<M, H> + RustToCuda,
46-
X: EmigrationExit<M, H, G, S> + RustToCuda,
47-
D: DispersalSampler<M, H, G> + RustToCuda,
48-
C: CoalescenceSampler<M, H, S> + RustToCuda,
49-
T: TurnoverRate<M, H> + RustToCuda,
50-
N: SpeciationProbability<M, H> + RustToCuda,
51-
E: MinSpeciationTrackingEventSampler<M, H, G, S, X, D, C, T, N> + RustToCuda,
52-
I: ImmigrationEntry<M> + RustToCuda,
53-
A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda,
45+
H: Habitat<M> + RustToCuda + NoAliasing,
46+
G: Rng<M, Generator: PrimeableRng> + RustToCuda + NoAliasing,
47+
S: LineageStore<M, H> + RustToCuda + NoAliasing,
48+
X: EmigrationExit<M, H, G, S> + RustToCuda + NoAliasing,
49+
D: DispersalSampler<M, H, G> + RustToCuda + NoAliasing,
50+
C: CoalescenceSampler<M, H, S> + RustToCuda + NoAliasing,
51+
T: TurnoverRate<M, H> + RustToCuda + NoAliasing,
52+
N: SpeciationProbability<M, H> + RustToCuda + NoAliasing,
53+
E: MinSpeciationTrackingEventSampler<M, H, G, S, X, D, C, T, N> + RustToCuda + NoAliasing,
54+
I: ImmigrationEntry<M> + RustToCuda + NoAliasing,
55+
A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + NoAliasing,
5456
ReportSpeciation: Boolean,
5557
ReportDispersal: Boolean,
5658
>(
@@ -135,13 +137,17 @@ pub fn simulate<
135137
}
136138

137139
// #[rust_cuda::common::kernel(
138-
// pub use link_sort_kernel! as impl SortableKernel<SortKernelArgs> for
139-
// SortKernel )]
140+
// pub use link_sort_kernel! as impl SortableKernel<
141+
// SortKernelArgs, SortKernelPtx,
142+
// > for SortKernel
143+
// )]
140144
// pub fn sort_events_step<ReportSpeciation: Boolean, ReportDispersal: Boolean>(
141-
// #[kernel(pass = LendRustToCuda, jit)] event_buffer_reporter: &mut
142-
// ShallowCopy<
143-
// necsim_impls_cuda::event_buffer::EventBuffer<ReportSpeciation,
144-
// ReportDispersal>, >,
145+
// #[kernel(pass = LendRustToCuda, jit)]
146+
// event_buffer_reporter: &mut ShallowCopy<
147+
// necsim_impls_cuda::event_buffer::EventBuffer<
148+
// ReportSpeciation, ReportDispersal,
149+
// >,
150+
// >,
145151
// #[kernel(pass = SafeDeviceCopy)] size: usize,
146152
// #[kernel(pass = SafeDeviceCopy)] stride: usize,
147153
// ) {
@@ -152,7 +158,9 @@ pub fn simulate<
152158
// }
153159

154160
#[rust_cuda::common::kernel(
155-
pub use link_even_odd_sort_kernel! as impl EvenOddSortableKernel<EvenOddSortKernelArgs> for EvenOddSortKernel
161+
pub use link_even_odd_sort_kernel! as impl EvenOddSortableKernel<
162+
EvenOddSortKernelArgs, EvenOddSortKernelPtx,
163+
> for EvenOddSortKernel
156164
)]
157165
pub fn even_odd_sort_events_step<ReportSpeciation: Boolean, ReportDispersal: Boolean>(
158166
#[kernel(pass = LendRustToCuda, jit)] event_buffer_reporter: &mut ShallowCopy<
@@ -168,7 +176,9 @@ pub fn even_odd_sort_events_step<ReportSpeciation: Boolean, ReportDispersal: Boo
168176
}
169177

170178
#[rust_cuda::common::kernel(
171-
pub use link_bitonic_global_sort_step_kernel! as impl BitonicGlobalSortSteppableKernel<BitonicGlobalSortStepKernelArgs> for BitonicGlobalSortStepKernel
179+
pub use link_bitonic_global_sort_step_kernel! as impl BitonicGlobalSortSteppableKernel<
180+
BitonicGlobalSortStepKernelArgs, BitonicGlobalSortStepKernelPtx,
181+
> for BitonicGlobalSortStepKernel
172182
)]
173183
pub fn bitonic_global_sort_events_step<ReportSpeciation: Boolean, ReportDispersal: Boolean>(
174184
#[kernel(pass = LendRustToCuda, jit)] event_buffer_reporter: &mut ShallowCopy<
@@ -184,7 +194,9 @@ pub fn bitonic_global_sort_events_step<ReportSpeciation: Boolean, ReportDispersa
184194
}
185195

186196
#[rust_cuda::common::kernel(
187-
pub use link_bitonic_shared_sort_step_kernel! as impl BitonicSharedSortSteppableKernel<BitonicSharedSortStepKernelArgs> for BitonicSharedSortStepKernel
197+
pub use link_bitonic_shared_sort_step_kernel! as impl BitonicSharedSortSteppableKernel<
198+
BitonicSharedSortStepKernelArgs, BitonicSharedSortStepKernelPtx,
199+
> for BitonicSharedSortStepKernel
188200
)]
189201
pub fn bitonic_shared_sort_events_step<ReportSpeciation: Boolean, ReportDispersal: Boolean>(
190202
#[kernel(pass = LendRustToCuda, jit)] event_buffer_reporter: &mut ShallowCopy<
@@ -199,7 +211,9 @@ pub fn bitonic_shared_sort_events_step<ReportSpeciation: Boolean, ReportDispersa
199211
}
200212

201213
#[rust_cuda::common::kernel(
202-
pub use link_bitonic_shared_sort_prep_kernel! as impl BitonicSharedSortPreparableKernel<BitonicSharedSortPrepKernelArgs> for BitonicSharedSortPrepKernel
214+
pub use link_bitonic_shared_sort_prep_kernel! as impl BitonicSharedSortPreparableKernel<
215+
BitonicSharedSortPrepKernelArgs, BitonicSharedSortPrepKernelPtx,
216+
> for BitonicSharedSortPrepKernel
203217
)]
204218
pub fn bitonic_shared_sort_events_prep<ReportSpeciation: Boolean, ReportDispersal: Boolean>(
205219
#[kernel(pass = LendRustToCuda, jit)] event_buffer_reporter: &mut ShallowCopy<
@@ -215,10 +229,10 @@ pub fn bitonic_shared_sort_events_prep<ReportSpeciation: Boolean, ReportDispersa
215229
mod cuda_prelude {
216230
use core::arch::nvptx;
217231

218-
use rust_cuda::device::utils;
232+
use rust_cuda::device::alloc::PTXAllocator;
219233

220234
#[global_allocator]
221-
static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator;
235+
static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator;
222236

223237
#[cfg(not(debug_assertions))]
224238
#[panic_handler]

0 commit comments

Comments
 (0)