Skip to content

Commit c67ca04

Browse files
committed
(ml5717) Started IncoherentRngCore impl + forward RNG to GPU
1 parent e84a7ea commit c67ca04

File tree

17 files changed

+286
-90
lines changed

17 files changed

+286
-90
lines changed

necsim-core/src/rng.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,72 @@
11
use crate::intrinsics::{floor, ln};
22

3+
use core::convert::AsMut;
4+
use core::default::Default;
5+
use core::ptr::copy_nonoverlapping;
6+
7+
#[allow(clippy::module_name_repetitions)]
8+
pub trait RngCore: Sized + Clone {
9+
type Seed: AsMut<[u8]> + Default + Sized;
10+
11+
#[must_use]
12+
fn from_seed(seed: Self::Seed) -> Self;
13+
14+
#[must_use]
15+
fn seed_from_u64(mut state: u64) -> Self {
16+
// Implementation from:
17+
// https://docs.rs/rand/0.7.3/rand/trait.SeedableRng.html#method.seed_from_u64
18+
19+
// We use PCG32 to generate a u32 sequence, and copy to the seed
20+
const MUL: u64 = 6_364_136_223_846_793_005_u64;
21+
const INC: u64 = 11_634_580_027_462_260_723_u64;
22+
23+
let mut seed = Self::Seed::default();
24+
for chunk in seed.as_mut().chunks_mut(4) {
25+
// We advance the state first (to get away from the input value,
26+
// in case it has low Hamming Weight).
27+
state = state.wrapping_mul(MUL).wrapping_add(INC);
28+
29+
// Use PCG output function with to_le to generate x:
30+
#[allow(clippy::cast_possible_truncation)]
31+
let xorshifted = (((state >> 18) ^ state) >> 27) as u32;
32+
#[allow(clippy::cast_possible_truncation)]
33+
let rot = (state >> 59) as u32;
34+
let x = xorshifted.rotate_right(rot).to_le();
35+
36+
unsafe {
37+
let p = &x as *const u32 as *const u8;
38+
copy_nonoverlapping(p, chunk.as_mut_ptr(), chunk.len());
39+
}
40+
}
41+
42+
Self::from_seed(seed)
43+
}
44+
45+
#[must_use]
46+
fn sample_u64(&mut self) -> u64;
47+
}
48+
49+
pub trait IncoherentRngCore: RngCore {
50+
type Prime: AsMut<[u8]> + Sized;
51+
52+
fn prime_with(&mut self, prime: Self::Prime);
53+
}
54+
355
#[allow(clippy::inline_always, clippy::inline_fn_without_body)]
456
#[contract_trait]
5-
pub trait Core {
57+
pub trait Rng: RngCore {
58+
#[must_use]
59+
#[inline]
660
#[debug_ensures(ret >= 0.0_f64 && ret <= 1.0_f64, "samples U(0.0, 1.0)")]
7-
fn sample_uniform(&mut self) -> f64;
8-
}
61+
fn sample_uniform(&mut self) -> f64 {
62+
// http://prng.di.unimi.it -> Generating uniform doubles in the unit interval
63+
#[allow(clippy::cast_precision_loss)]
64+
((self.sample_u64() >> 11) as f64)
65+
* f64::from_bits(0x3CA0_0000_0000_0000_u64) //0x1.0p-53
66+
}
967

10-
pub trait Rng: Core {
68+
#[must_use]
69+
#[inline]
1170
#[debug_ensures(ret < length, "samples U(0, length - 1)")]
1271
fn sample_index(&mut self, length: usize) -> usize {
1372
// attributes on expressions are experimental
@@ -21,12 +80,16 @@ pub trait Rng: Core {
2180
index
2281
}
2382

83+
#[must_use]
84+
#[inline]
2485
#[debug_requires(lambda > 0.0, "lambda > 0.0")]
2586
#[debug_ensures(ret >= 0.0, "samples Exp(lambda)")]
2687
fn sample_exponential(&mut self, lambda: f64) -> f64 {
2788
-ln(self.sample_uniform()) / lambda
2889
}
2990

91+
#[must_use]
92+
#[inline]
3093
#[debug_requires(
3194
probability >= 0.0_f64 && probability <= 1.0_f64,
3295
"0.0 <= probability <= 1.0"
@@ -36,4 +99,4 @@ pub trait Rng: Core {
3699
}
37100
}
38101

39-
impl<T: Core> Rng for T {}
102+
impl<R: RngCore> Rng for R {}

necsim-cuda/kernel/src/lib.rs

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -44,35 +44,43 @@ use necsim_core::cogs::{
4444
IncoherentLineageStore, LineageReference,
4545
};
4646
use necsim_core::reporter::NullReporter;
47+
use necsim_core::rng::Rng;
4748
use necsim_core::simulation::Simulation;
48-
use necsim_impls_no_std::rng::wyrng::WyRng;
49+
use necsim_impls_no_std::rng::cuda::CudaRng;
4950
use rust_cuda::common::RustToCuda;
5051
use rust_cuda::device::BorrowFromRust;
5152
use rustacuda_core::DeviceCopy;
5253

5354
#[no_mangle]
5455
/// # Safety
5556
/// This CUDA kernel is unsafe as it is called with raw pointers
56-
pub unsafe extern "ptx-kernel" fn simulate(c_void_ptr: *mut core::ffi::c_void) {
57-
use necsim_impls_no_std::cogs::active_lineage_sampler::independent::IndependentActiveLineageSampler;
58-
use necsim_impls_no_std::cogs::coalescence_sampler::independent::IndependentCoalescenceSampler;
59-
use necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler;
60-
use necsim_impls_no_std::cogs::event_sampler::independent::IndependentEventSampler;
61-
use necsim_impls_no_std::cogs::habitat::in_memory::InMemoryHabitat;
62-
use necsim_impls_no_std::cogs::lineage_reference::in_memory::InMemoryLineageReference;
63-
use necsim_impls_no_std::cogs::lineage_store::incoherent::in_memory::IncoherentInMemoryLineageStore;
57+
pub unsafe extern "ptx-kernel" fn simulate(
58+
simulation_c_ptr: *mut core::ffi::c_void,
59+
cuda_rng_c_ptr: *mut core::ffi::c_void,
60+
max_steps: usize,
61+
) {
62+
use necsim_impls_no_std::cogs::active_lineage_sampler::independent::IndependentActiveLineageSampler as ActiveLineageSampler;
63+
use necsim_impls_no_std::cogs::coalescence_sampler::independent::IndependentCoalescenceSampler as CoalescenceSampler;
64+
use necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler as DispersalSampler;
65+
use necsim_impls_no_std::cogs::event_sampler::independent::IndependentEventSampler as EventSampler;
66+
use necsim_impls_no_std::cogs::habitat::in_memory::InMemoryHabitat as Habitat;
67+
use necsim_impls_no_std::cogs::lineage_reference::in_memory::InMemoryLineageReference as LineageReference;
68+
use necsim_impls_no_std::cogs::lineage_store::incoherent::in_memory::IncoherentInMemoryLineageStore as LineageStore;
69+
use necsim_impls_no_std::rng::aes::AesRng as Rng;
6470

6571
simulate_generic(
66-
c_void_ptr
72+
simulation_c_ptr
6773
as *mut <Simulation<
68-
InMemoryHabitat,
69-
InMemoryPackedAliasDispersalSampler,
70-
InMemoryLineageReference,
71-
IncoherentInMemoryLineageStore<_>,
72-
IndependentCoalescenceSampler<_, _, _>,
73-
IndependentEventSampler<_, _, _, _>,
74-
IndependentActiveLineageSampler<_, _, _, _>,
74+
Habitat,
75+
DispersalSampler,
76+
LineageReference,
77+
LineageStore<_>,
78+
CoalescenceSampler<_, _, _>,
79+
EventSampler<_, _, _, _>,
80+
ActiveLineageSampler<_, _, _, _>,
7581
> as RustToCuda>::CudaRepresentation,
82+
cuda_rng_c_ptr as *mut <CudaRng<Rng> as RustToCuda>::CudaRepresentation,
83+
max_steps,
7684
)
7785
}
7886

@@ -84,21 +92,21 @@ unsafe fn simulate_generic<
8492
C: CoalescenceSampler<H, R, S> + RustToCuda,
8593
E: EventSampler<H, D, R, S, C> + RustToCuda,
8694
A: ActiveLineageSampler<H, D, R, S, C, E> + RustToCuda,
95+
G: Rng,
8796
>(
8897
simulation_ptr: *mut <Simulation<H, D, R, S, C, E, A> as RustToCuda>::CudaRepresentation,
98+
cuda_rng_ptr: *mut <CudaRng<G> as RustToCuda>::CudaRepresentation,
99+
max_steps: usize,
89100
) {
90101
Simulation::with_borrow_from_rust_mut(simulation_ptr, |simulation| {
91-
let max_steps: usize = 100;
92-
#[allow(clippy::cast_sign_loss)]
93-
let rng_seed: u64 = utils::index() as u64;
94-
95-
let mut rng = WyRng::from_seed(rng_seed);
96-
let mut reporter = NullReporter;
102+
CudaRng::with_borrow_from_rust_mut(cuda_rng_ptr, |cuda_rng| {
103+
let mut reporter = NullReporter;
97104

98-
//println!("{:#?}", simulation);
105+
//println!("{:#?}", simulation);
99106

100-
let (time, steps) = simulation.simulate_incremental(max_steps, &mut rng, &mut reporter);
107+
let (time, steps) = simulation.simulate_incremental(max_steps, cuda_rng, &mut reporter);
101108

102-
println!("time = {:?}, steps = {}", F64(time), steps);
109+
println!("time = {:?}, steps = {}", F64(time), steps);
110+
})
103111
})
104112
}

necsim-cuda/src/lib.rs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use necsim_impls_no_std::cogs::event_sampler::independent::IndependentEventSampl
2828
use necsim_impls_no_std::cogs::habitat::in_memory::InMemoryHabitat;
2929
use necsim_impls_no_std::cogs::lineage_reference::in_memory::InMemoryLineageReference;
3030
use necsim_impls_no_std::cogs::lineage_store::incoherent::in_memory::IncoherentInMemoryLineageStore;
31+
use necsim_impls_no_std::rng::cuda::CudaRng;
3132
use necsim_impls_std::cogs::dispersal_sampler::in_memory::InMemoryDispersalSampler;
3233

3334
macro_rules! with_cuda {
@@ -79,7 +80,7 @@ impl CudaSimulation {
7980
dispersal: &Array2D<f64>,
8081
speciation_probability_per_generation: f64,
8182
sample_percentage: f64,
82-
_rng: &mut impl Rng,
83+
rng: &mut impl Rng,
8384
_reporter: &mut impl Reporter<InMemoryHabitat, InMemoryLineageReference>,
8485
) -> Result<(f64, usize)> {
8586
let habitat = InMemoryHabitat::new(habitat.clone());
@@ -104,6 +105,8 @@ impl CudaSimulation {
104105
.active_lineage_sampler(active_lineage_sampler)
105106
.build();
106107

108+
let mut cuda_rng = CudaRng::from_cloned(rng);
109+
107110
//let (time, steps) = simulation.simulate(rng, reporter);
108111

109112
let module_data = CString::new(include_str!(env!("KERNEL_PTX_PATH"))).unwrap();
@@ -135,15 +138,19 @@ impl CudaSimulation {
135138
//println!("{:?}", CurrentContext::get_resource_limit(ResourceLimit::MaxL2FetchGranularity));
136139

137140
if let Err(err) = simulation.lend_to_cuda_mut(|simulation_mut_ptr| {
138-
// Launching kernels is unsafe since Rust can't enforce safety - think of kernel launches
139-
// as a foreign-function call. In this case, it is - this kernel is written in CUDA C.
140-
unsafe {
141-
launch!(module.simulate<<<1, 1, 0, stream>>>(
142-
simulation_mut_ptr
143-
))?;
144-
}
145-
146-
stream.synchronize()
141+
cuda_rng.lend_to_cuda_mut(|cuda_rng_mut_ptr| {
142+
// Launching kernels is unsafe since Rust can't enforce safety - think of kernel launches
143+
// as a foreign-function call. In this case, it is - this kernel is written in CUDA C.
144+
unsafe {
145+
launch!(module.simulate<<<1, 1, 0, stream>>>(
146+
simulation_mut_ptr,
147+
cuda_rng_mut_ptr,
148+
1_000_usize // max steps on GPU
149+
))?;
150+
}
151+
152+
stream.synchronize()
153+
})
147154
}) {
148155
eprintln!("Running kernel failed with {:#?}!", err);
149156
}

necsim-impls-no-std/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ array2d = { path = "../array2d-no-std" }
1616
contracts = "0.6.0"
1717
float_next_after = { path = "../float-next-after-no-std" }
1818
nanorand = { version = "0.4.4", default-features = false, features = ["wyrand"] }
19+
aes-soft = "0.6.3"
1920

2021
rust-cuda-derive = { path = "../rust-cuda/rust-cuda-derive", optional = true }
2122
rustacuda_core = { version = "0.1.2", optional = true }

necsim-impls-no-std/src/cogs/active_lineage_sampler/independent/sampler.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,16 @@ impl<
5757
.lineage_store
5858
.extract_lineage_from_its_location(chosen_lineage_reference.clone());
5959

60+
// TODO: As we are only doing geometric sampling for now, need to immediately increment discrete time step
61+
// TODO: How do we choose the time step for now?
62+
// TODO: Need to prime incoherent RNG here with location, discrete time step and substep 0
63+
64+
// TODO: Need to get time to next event in while loop with exponential (simplest option)
65+
6066
let event_time = time + rng.sample_exponential(0.5_f64);
6167

68+
// TODO: Need to prime incoherent RNG here with location, discrete time step and substep 0
69+
6270
let unique_event_time: f64 = if event_time > time {
6371
event_time
6472
} else {

necsim-impls-no-std/src/rng/aes.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use aes_soft::cipher::generic_array::GenericArray;
2+
use aes_soft::cipher::{BlockCipher, NewBlockCipher};
3+
use aes_soft::Aes128;
4+
5+
#[allow(clippy::module_name_repetitions)]
6+
#[derive(Clone)]
7+
pub struct AesRng {
8+
cipher: Aes128,
9+
state: [u8; 16],
10+
cached: bool,
11+
}
12+
13+
impl necsim_core::rng::RngCore for AesRng {
14+
type Seed = [u8; 16];
15+
16+
#[must_use]
17+
#[inline]
18+
fn from_seed(seed: Self::Seed) -> Self {
19+
Self {
20+
cipher: Aes128::new(GenericArray::from_slice(&seed)),
21+
state: [0_u8; 16],
22+
cached: false,
23+
}
24+
}
25+
26+
#[must_use]
27+
#[inline]
28+
fn sample_u64(&mut self) -> u64 {
29+
self.cached ^= true;
30+
31+
if self.cached {
32+
// one more u64 will be cached
33+
self.cipher
34+
.encrypt_block(GenericArray::from_mut_slice(&mut self.state));
35+
36+
u64::from_le_bytes([
37+
self.state[0],
38+
self.state[1],
39+
self.state[2],
40+
self.state[3],
41+
self.state[4],
42+
self.state[5],
43+
self.state[6],
44+
self.state[7],
45+
])
46+
} else {
47+
// one more u64 was cached
48+
let rand_u64 = u64::from_le_bytes([
49+
self.state[8],
50+
self.state[9],
51+
self.state[10],
52+
self.state[11],
53+
self.state[12],
54+
self.state[13],
55+
self.state[14],
56+
self.state[15],
57+
]);
58+
59+
self.state[9] = self.state[9].wrapping_add(1);
60+
61+
rand_u64
62+
}
63+
}
64+
}
65+
66+
impl necsim_core::rng::IncoherentRngCore for AesRng {
67+
type Prime = [u8; 16];
68+
69+
fn prime_with(&mut self, prime: Self::Prime) {
70+
self.state = prime;
71+
self.cached = false;
72+
}
73+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
use necsim_core::rng::RngCore;
2+
3+
#[allow(clippy::module_name_repetitions)]
4+
#[derive(Clone, RustToCuda, LendToCuda)]
5+
pub struct CudaRng<R: RngCore>(R);
6+
7+
impl<R: RngCore> CudaRng<R> {
8+
#[must_use]
9+
#[inline]
10+
pub fn from_cloned(rng: &mut R) -> Self {
11+
Self(rng.clone())
12+
}
13+
}
14+
15+
impl<R: RngCore> RngCore for CudaRng<R> {
16+
type Seed = <R as RngCore>::Seed;
17+
18+
#[must_use]
19+
#[inline]
20+
fn from_seed(seed: Self::Seed) -> Self {
21+
Self(R::from_seed(seed))
22+
}
23+
24+
#[must_use]
25+
#[inline]
26+
fn sample_u64(&mut self) -> u64 {
27+
self.0.sample_u64()
28+
}
29+
}

necsim-impls-no-std/src/rng/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
pub mod wyrng;
1+
pub mod aes;
2+
pub mod wy;
3+
4+
#[cfg(feature = "cuda")]
5+
pub mod cuda;

0 commit comments

Comments
 (0)