Skip to content

Commit 0ea7c21

Browse files
committed
(ml5717) Implemented CUDA specific Event Buffer
1 parent 1f535b1 commit 0ea7c21

File tree

25 files changed

+410
-66
lines changed

25 files changed

+410
-66
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ members = [
1111
"necsim-core",
1212
"necsim-impls-std",
1313
"necsim-impls-no-std",
14+
"necsim-impls-cuda",
1415

1516
"necsim-classical",
1617
"necsim-gillespie",

TODO

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
- implement event transform from GPU to CPU
1+
- implement event transfer from GPU to CPU
2+
- implement simulation domain slicing so that event buffer fits into GPU RAM
23
- implement event deduplication (global vs local? - needs to be global for speciation)
34
- implement simple stop condition for time sliced kernel launching
45
- implement declarative event filtering which can be put onto the GPU as well

necsim-core/src/event.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,17 @@ use core::marker::PhantomData;
33
use crate::cogs::{Habitat, LineageReference};
44
use crate::landscape::IndexedLocation;
55

6+
#[cfg(feature = "cuda")]
7+
use rustacuda_core::DeviceCopy;
8+
9+
#[cfg(feature = "cuda")]
10+
use rust_cuda::common::RustToCuda;
11+
612
pub struct Event<H: Habitat, R: LineageReference<H>> {
713
time: f64,
814
lineage_reference: R,
915
r#type: EventType<H, R>,
10-
_marker: PhantomData<H>,
16+
marker: PhantomData<H>,
1117
}
1218

1319
impl<H: Habitat, R: LineageReference<H>> Event<H, R> {
@@ -21,7 +27,7 @@ impl<H: Habitat, R: LineageReference<H>> Event<H, R> {
2127
time,
2228
lineage_reference,
2329
r#type,
24-
_marker: PhantomData::<H>,
30+
marker: PhantomData::<H>,
2531
}
2632
}
2733

@@ -49,10 +55,40 @@ pub enum EventType<H: Habitat, R: LineageReference<H>> {
4955
origin: IndexedLocation,
5056
target: IndexedLocation,
5157
coalescence: Option<R>,
52-
_marker: PhantomData<H>,
58+
marker: PhantomData<H>,
5359
},
5460
}
5561

62+
impl<H: Habitat, R: LineageReference<H>> Clone for Event<H, R> {
63+
fn clone(&self) -> Self {
64+
Self {
65+
time: self.time,
66+
lineage_reference: self.lineage_reference.clone(),
67+
r#type: self.r#type.clone(),
68+
marker: self.marker.clone(),
69+
}
70+
}
71+
}
72+
73+
impl<H: Habitat, R: LineageReference<H>> Clone for EventType<H, R> {
74+
fn clone(&self) -> Self {
75+
match self {
76+
EventType::Speciation => EventType::Speciation,
77+
EventType::Dispersal {
78+
origin,
79+
target,
80+
coalescence,
81+
marker,
82+
} => EventType::Dispersal {
83+
origin: origin.clone(),
84+
target: target.clone(),
85+
coalescence: coalescence.clone(),
86+
marker: marker.clone(),
87+
},
88+
}
89+
}
90+
}
91+
5692
impl<H: Habitat, R: LineageReference<H>> core::fmt::Debug for Event<H, R> {
5793
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
5894
f.debug_struct("Event")
@@ -62,7 +98,19 @@ impl<H: Habitat, R: LineageReference<H>> core::fmt::Debug for Event<H, R> {
6298
)
6399
.field("lineage_reference", &self.lineage_reference)
64100
.field("type", &self.r#type)
65-
.field("_marker", &format_args!("PhantomData"))
101+
.field("marker", &format_args!("PhantomData"))
66102
.finish()
67103
}
68104
}
105+
106+
#[cfg(feature = "cuda")]
107+
unsafe impl<H: Habitat + RustToCuda, R: LineageReference<H> + DeviceCopy> DeviceCopy
108+
for Event<H, R>
109+
{
110+
}
111+
112+
#[cfg(feature = "cuda")]
113+
unsafe impl<H: Habitat + RustToCuda, R: LineageReference<H> + DeviceCopy> DeviceCopy
114+
for EventType<H, R>
115+
{
116+
}

necsim-cuda/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ edition = "2018"
1010
necsim-core = { path = "../necsim-core", features = ["cuda"] }
1111
necsim-impls-no-std = { path = "../necsim-impls-no-std", features = ["cuda"] }
1212
necsim-impls-std = { path = "../necsim-impls-std" }
13+
necsim-impls-cuda = { path = "../necsim-impls-cuda" }
1314
array2d = { path = "../array2d-no-std" }
1415
anyhow = "1.0"
1516
contracts = "0.6.0"

necsim-cuda/kernel/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ ryu = "1.0.5"
1111
necsim-core = { path = "../../necsim-core", features = ["cuda"] }
1212
rust-cuda = { path = "../../rust-cuda", features = [] }
1313
necsim-impls-no-std = { path = "../../necsim-impls-no-std", features = ["cuda"] }
14+
necsim-impls-cuda = { path = "../../necsim-impls-cuda" }
1415
rustacuda_core = "0.1.2"

necsim-cuda/kernel/src/lib.rs

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,21 @@ use necsim_core::cogs::{
4343
ActiveLineageSampler, CoalescenceSampler, DispersalSampler, EventSampler,
4444
HabitatToU64Injection, IncoherentLineageStore, LineageReference, PrimeableRng,
4545
};
46-
use necsim_core::reporter::NullReporter;
4746
use necsim_core::simulation::Simulation;
4847
use rust_cuda::common::RustToCuda;
4948
use rust_cuda::device::BorrowFromRust;
5049
use rustacuda_core::DeviceCopy;
5150

52-
use necsim_impls_no_std::cogs::rng::cuda::CudaRng;
51+
use necsim_impls_cuda::cogs::rng::CudaRng;
52+
use necsim_impls_cuda::event_buffer::common::EventBufferCudaRepresentation;
53+
use necsim_impls_cuda::event_buffer::device::EventBufferDevice;
5354

5455
#[no_mangle]
5556
/// # Safety
5657
/// This CUDA kernel is unsafe as it is called with raw pointers
5758
pub unsafe extern "ptx-kernel" fn simulate(
5859
simulation_c_ptr: *mut core::ffi::c_void,
60+
event_buffer_c_ptr: *mut core::ffi::c_void,
5961
max_steps: usize,
6062
) {
6163
use necsim_impls_no_std::cogs::active_lineage_sampler::independent::IndependentActiveLineageSampler as ActiveLineageSampler;
@@ -79,6 +81,7 @@ pub unsafe extern "ptx-kernel" fn simulate(
7981
EventSampler<_, _, _, _, _>,
8082
ActiveLineageSampler<_, _, _, _, _>,
8183
> as RustToCuda>::CudaRepresentation,
84+
event_buffer_c_ptr as *mut EventBufferCudaRepresentation<Habitat, LineageReference>,
8285
max_steps,
8386
)
8487
}
@@ -94,20 +97,21 @@ unsafe fn simulate_generic<
9497
A: ActiveLineageSampler<H, G, D, R, S, C, E> + RustToCuda,
9598
>(
9699
simulation_ptr: *mut <Simulation<H, G, D, R, S, C, E, A> as RustToCuda>::CudaRepresentation,
100+
event_buffer_ptr: *mut EventBufferCudaRepresentation<H, R>,
97101
max_steps: usize,
98102
) {
99103
Simulation::with_borrow_from_rust_mut(simulation_ptr, |simulation| {
100-
let mut reporter = NullReporter;
104+
EventBufferDevice::with_borrow_from_rust_mut(event_buffer_ptr, |event_buffer_reporter| {
105+
let (time, steps) = simulation.simulate_incremental(max_steps, event_buffer_reporter);
101106

102-
let (time, steps) = simulation.simulate_incremental(max_steps, &mut reporter);
103-
104-
if utils::thread_idx().as_id(&utils::block_dim()) == 0 {
105-
println!(
106-
"index = {}, time = {:?}, steps = {}",
107-
utils::index(),
108-
F64(time),
109-
steps
110-
);
111-
}
107+
if utils::thread_idx().as_id(&utils::block_dim()) == 0 {
108+
println!(
109+
"index = {}, time = {:?}, steps = {}",
110+
utils::index(),
111+
F64(time),
112+
steps
113+
);
114+
}
115+
})
112116
})
113117
}

necsim-cuda/src/lib.rs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ use necsim_core::cogs::{LineageStore, PrimeableRng};
2121
use necsim_core::reporter::Reporter;
2222
use necsim_core::simulation::Simulation;
2323

24+
use necsim_impls_cuda::cogs::rng::CudaRng;
25+
use necsim_impls_cuda::event_buffer::host::EventBufferHost;
2426
use necsim_impls_no_std::cogs::active_lineage_sampler::independent::IndependentActiveLineageSampler;
2527
use necsim_impls_no_std::cogs::coalescence_sampler::independent::IndependentCoalescenceSampler;
2628
use necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler;
2729
use necsim_impls_no_std::cogs::event_sampler::independent::IndependentEventSampler;
2830
use necsim_impls_no_std::cogs::habitat::in_memory::InMemoryHabitat;
2931
use necsim_impls_no_std::cogs::lineage_reference::in_memory::InMemoryLineageReference;
3032
use necsim_impls_no_std::cogs::lineage_store::incoherent::in_memory::IncoherentInMemoryLineageStore;
31-
use necsim_impls_no_std::cogs::rng::cuda::CudaRng;
3233
use necsim_impls_std::cogs::dispersal_sampler::in_memory::InMemoryDispersalSampler;
3334

3435
macro_rules! with_cuda {
@@ -155,8 +156,10 @@ impl CudaSimulation {
155156
speciation_probability_per_generation: f64,
156157
sample_percentage: f64,
157158
rng: G,
158-
_reporter: &mut impl Reporter<InMemoryHabitat, InMemoryLineageReference>,
159+
reporter: &mut impl Reporter<InMemoryHabitat, InMemoryLineageReference>,
159160
) -> Result<(f64, usize)> {
161+
const SIMULATION_STEP_SLICE: usize = 1_000_usize;
162+
160163
let habitat = InMemoryHabitat::new(habitat.clone());
161164
let dispersal_sampler = InMemoryPackedAliasDispersalSampler::new(dispersal, &habitat)?;
162165
let lineage_store = IncoherentInMemoryLineageStore::new(sample_percentage, &habitat);
@@ -187,6 +190,9 @@ impl CudaSimulation {
187190
+ (total_individuals % cuda_block_length > 0) as u32
188191
});
189192

193+
let mut event_buffer: EventBufferHost<InMemoryHabitat, InMemoryLineageReference> =
194+
EventBufferHost::new(&cuda_block_size, &cuda_grid_size, SIMULATION_STEP_SLICE)?;
195+
190196
//let (time, steps) = simulation.simulate(rng, reporter);
191197

192198
let module_data = CString::new(include_str!(env!("KERNEL_PTX_PATH"))).unwrap();
@@ -216,16 +222,27 @@ impl CudaSimulation {
216222
print_kernel_function_attributes(&simulate_kernel);
217223

218224
if let Err(err) = simulation.lend_to_cuda_mut(|simulation_mut_ptr| {
225+
let block_index_range = 0..(cuda_grid_size.x * cuda_grid_size.y * cuda_grid_size.z);
226+
219227
// Launching kernels is unsafe since Rust can't enforce safety - think of kernel launches
220228
// as a foreign-function call. In this case, it is - this kernel is written in CUDA C.
221229
unsafe {
222230
launch!(simulate_kernel<<<cuda_grid_size, cuda_block_size, 0, stream>>>(
223231
simulation_mut_ptr,
224-
1_000_usize // max steps on GPU
232+
event_buffer.get_mut_cuda_ptr(),
233+
SIMULATION_STEP_SLICE
225234
))?;
226235
}
227236

228-
stream.synchronize()
237+
stream.synchronize()?;
238+
239+
for block_index in block_index_range {
240+
event_buffer.with_fetched_events_for_block(block_index as usize, |events| {
241+
events.iter().for_each(|event| reporter.report_event(event))
242+
})?
243+
}
244+
245+
Ok(())
229246
}) {
230247
eprintln!("Running kernel failed with {:#?}!", err);
231248
}

necsim-impls-cuda/Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "necsim-impls-cuda"
3+
version = "0.1.0"
4+
authors = ["Moritz Langenstein <ml5717@ic.ac.uk>"]
5+
edition = "2018"
6+
7+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8+
9+
[dependencies]
10+
necsim-core = { path = "../necsim-core", features = ["cuda"] }
11+
contracts = "0.6.0"
12+
13+
rust-cuda-derive = { path = "../rust-cuda/rust-cuda-derive" }
14+
rustacuda_core = { version = "0.1.2" }
15+
16+
[target.'cfg(target_os = "cuda")'.dependencies]
17+
rust-cuda = { path = "../rust-cuda", features = [] }
18+
19+
[target.'cfg(not(target_os = "cuda"))'.dependencies]
20+
rust-cuda = { path = "../rust-cuda", features = ["host"] }
21+
rustacuda = { version = "0.1.2" }

necsim-impls-cuda/src/cogs/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod rng;
File renamed without changes.

0 commit comments

Comments
 (0)