diff --git a/Cargo.lock.patch b/Cargo.lock.patch index 02d4ac1e..89f5fc69 100644 --- a/Cargo.lock.patch +++ b/Cargo.lock.patch @@ -1,7 +1,7 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock --- a/necsim-rust/Cargo.lock +++ b/necsim-rust/Cargo.lock -@@ -55,6 +55,79 @@ dependencies = [ +@@ -28,6 +28,85 @@ dependencies = [ "memchr", ] @@ -9,11 +9,11 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock +name = "analysis-performance-exponential" +version = "0.1.0" +dependencies = [ -+ "contracts", ++ "analysis-performance-exponential-kernel", + "necsim-core", + "necsim-core-bond", ++ "necsim-core-maths", + "necsim-impls-no-std", -+ "ptx-builder", + "rust-cuda", + "structopt", +] @@ -38,6 +38,7 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock + "log", + "necsim-core", + "necsim-core-bond", ++ "necsim-core-maths", + "necsim-impls-no-std", + "necsim-impls-std", + "necsim-plugins-common", @@ -51,7 +52,9 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock + "contracts", + "necsim-core", + "necsim-core-bond", ++ "necsim-core-maths", + "necsim-impls-no-std", ++ "serde", + "structopt", +] + @@ -60,6 +63,7 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock +version = "0.1.0" +dependencies = [ + "necsim-core", ++ "necsim-core-maths", + "necsim-impls-no-std", + "rand", + "structopt", @@ -73,23 +77,25 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock + "contracts", + "necsim-core", + "necsim-core-bond", ++ "necsim-core-maths", + "necsim-impls-no-std", + "necsim-impls-std", ++ "serde", + "structopt", +] + [[package]] name = "ansi_term" - version = "0.11.0" -@@ -169,6 +242,15 @@ dependencies = [ + version = "0.12.1" +@@ -157,6 +236,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "byte-unit" -+version = "4.0.12" ++version = "4.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" -+checksum = "063197e6eb4b775b64160dedde7a0986bb2836cce140e9492e9e96f28e18bcd8" ++checksum = "956ffc5b0ec7d7a6949e3f21fd63ba5af4cffdc2ba1e0b7bf62b481458c4ae7f" +dependencies = [ + "utf8-width", +] @@ -97,7 +103,7 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock [[package]] name = "byteorder" version = "1.4.3" -@@ -1536,6 +1618,12 @@ version = "0.2.2" +@@ -1825,6 +1913,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" @@ -108,5 +114,5 @@ diff --git a/necsim-rust/Cargo.lock b/necsim-rust/Cargo.lock +checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b" + [[package]] - name = "vcpkg" - version = "0.2.15" + name = "utf8parse" + version = "0.2.0" diff --git a/Cargo.toml.patch b/Cargo.toml.patch index a3d9bae9..b1640495 100644 --- a/Cargo.toml.patch +++ b/Cargo.toml.patch @@ -1,17 +1,17 @@ diff --git a/necsim-rust/Cargo.toml b/necsim-rust/Cargo.toml --- a/necsim-rust/Cargo.toml +++ b/necsim-rust/Cargo.toml -@@ -36,6 +36,13 @@ members = [ - "rust-cuda", - "rust-cuda/rust-cuda-derive", - +@@ -31,6 +31,13 @@ members = [ + "rustcoalescence/algorithms/cuda", + "rustcoalescence/algorithms/cuda/gpu-kernel", + "rustcoalescence/algorithms/cuda/cpu-kernel", ++ + "analysis/rng/randomness", + "analysis/rng/hash", + "analysis/rng/correlation", + "analysis/performance/exponential", + "analysis/performance/exponential/kernel", + "analysis/performance/reporting", -+ - "third-party/array2d-no-std", - "third-party/contracts", - "third-party/float-next-after-no-std", + ] + + default-members = [ diff --git a/analysis/performance/exponential/Cargo.toml b/analysis/performance/exponential/Cargo.toml index 7f5494eb..16682147 100644 --- a/analysis/performance/exponential/Cargo.toml +++ b/analysis/performance/exponential/Cargo.toml @@ -8,14 +8,13 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +analysis-performance-exponential-kernel = { path = "kernel" } + necsim-core = { path = "../../../necsim/core", features = ["cuda"] } -necsim-core-bond = { path = "../../../necsim/core/bond", features = ["cuda"] } +necsim-core-bond = { path = "../../../necsim/core/bond" } +necsim-core-maths = { path = "../../../necsim/core/maths" } necsim-impls-no-std = { path = "../../../necsim/impls/no-std" } -rust-cuda = { path = "../../../rust-cuda", features = ["host"] } - -contracts = { path = "../../../third-party/contracts" } -structopt = "0.3.21" +rust-cuda = { git = "https://github.com/MomoLangenstein/rust-cuda", branch = "main", features = ["host"] } -[build-dependencies] -ptx-builder = { path = "../../../third-party/rust-ptx-builder" } +structopt = "0.3" diff --git a/analysis/performance/exponential/build.rs b/analysis/performance/exponential/build.rs deleted file mode 100644 index e7851454..00000000 --- a/analysis/performance/exponential/build.rs +++ /dev/null @@ -1,7 +0,0 @@ -use ptx_builder::{builder::Builder, error::Result, reporter::CargoAdapter}; - -fn main() -> Result<()> { - let builder = Builder::new("kernel")?; - - CargoAdapter::with_env_var("CUDA_PTX_KERNEL").build(builder); -} diff --git a/analysis/performance/exponential/kernel/.cargo/config.toml b/analysis/performance/exponential/kernel/.cargo/config.toml index 10636301..12c64e86 100644 --- a/analysis/performance/exponential/kernel/.cargo/config.toml +++ b/analysis/performance/exponential/kernel/.cargo/config.toml @@ -2,7 +2,7 @@ pipelining = false [target.nvptx64-nvidia-cuda] -rustflags = ["-Clink-args=--arch sm_35", "-Cpanic=abort", "-Clto=no", "-Clink-arg=-Olto"] +rustflags = ["-Clink-args=--arch=sm_35", "-Cpanic=abort", "-Clinker-plugin-lto", "-Ccodegen-units=1", "-Clink-arg=-Olto"] [unstable] build-std = ["core", "alloc"] diff --git a/analysis/performance/exponential/kernel/Cargo.toml b/analysis/performance/exponential/kernel/Cargo.toml index 68ca6028..2b81d391 100644 --- a/analysis/performance/exponential/kernel/Cargo.toml +++ b/analysis/performance/exponential/kernel/Cargo.toml @@ -5,12 +5,15 @@ authors = ["Momo Langenstein "] license = "MIT OR Apache-2.0" edition = "2018" +[lib] +crate-type = ["cdylib", "rlib"] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] necsim-core = { path = "../../../../necsim/core", features = ["cuda"] } -necsim-core-bond = { path = "../../../../necsim/core/bond", features = ["cuda"] } -rust-cuda = { path = "../../../../rust-cuda", features = [] } +necsim-core-bond = { path = "../../../../necsim/core/bond" } +rust-cuda = { git = "https://github.com/MomoLangenstein/rust-cuda", branch = "main", features = [] } necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["cuda"] } necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } -contracts = { path = "../../../../third-party/contracts" } +contracts = "0.6.3" diff --git a/analysis/performance/exponential/kernel/src/benchmark.rs b/analysis/performance/exponential/kernel/src/benchmark.rs new file mode 100644 index 00000000..34172f45 --- /dev/null +++ b/analysis/performance/exponential/kernel/src/benchmark.rs @@ -0,0 +1,55 @@ +use core::{ + num::NonZeroU32, + sync::atomic::{AtomicU64, Ordering}, +}; + +use necsim_core::{ + cogs::SeedableRng, + landscape::{IndexedLocation, Location}, +}; +use necsim_core_bond::{OffByOneU32, PositiveF64}; + +use necsim_impls_cuda::cogs::maths::NvptxMathsCore; +use necsim_impls_no_std::cogs::{ + active_lineage_sampler::independent::event_time_sampler::EventTimeSampler, + habitat::non_spatial::NonSpatialHabitat, rng::wyhash::WyHash, +}; + +use crate::{sample, UniformTurnoverRate}; + +#[inline] +#[allow(dead_code)] +pub fn inter_event_times< + E: EventTimeSampler< + NvptxMathsCore, + NonSpatialHabitat, + WyHash, + UniformTurnoverRate, + >, +>( + event_time_sampler: E, + seed: u64, + lambda: PositiveF64, + limit: u128, + total_cycles_sum: &AtomicU64, + total_time_sum: &AtomicU64, +) { + let habitat = NonSpatialHabitat::new((OffByOneU32::one(), OffByOneU32::one()), unsafe { + NonZeroU32::new_unchecked(1) + }); + let rng = WyHash::seed_from_u64(seed + (rust_cuda::device::utils::index() as u64)); + let turnover_rate = UniformTurnoverRate::new(lambda); + let indexed_location = IndexedLocation::new(Location::new(0, 0), 0); + + let (cycles, time) = sample::exponential_inter_event_times( + habitat, + rng, + turnover_rate, + event_time_sampler, + indexed_location, + limit, + ); + + total_cycles_sum.fetch_add(cycles, Ordering::Relaxed); + total_time_sum.fetch_add(time, Ordering::Relaxed); +} diff --git a/analysis/performance/exponential/kernel/src/clock.rs b/analysis/performance/exponential/kernel/src/clock.rs new file mode 100644 index 00000000..7fab4df8 --- /dev/null +++ b/analysis/performance/exponential/kernel/src/clock.rs @@ -0,0 +1,17 @@ +/// A predefined, read-only 64-bit unsigned cycle counter. +#[inline] +#[must_use] +pub fn counter() -> u64 { + let counter: u64; + unsafe { core::arch::asm!("mov.u64 {}, %clock64;", out(reg64) counter, options(nostack)) }; + counter +} + +/// A predefined, 64-bit global nanosecond timer. +#[inline] +#[must_use] +pub fn timer_ns() -> u64 { + let timer: u64; + unsafe { core::arch::asm!("mov.u64 {}, %globaltimer;", out(reg64) timer, options(nostack)) }; + timer +} diff --git a/analysis/performance/exponential/kernel/src/lib.rs b/analysis/performance/exponential/kernel/src/lib.rs index 1d519749..bb4e2c70 100644 --- a/analysis/performance/exponential/kernel/src/lib.rs +++ b/analysis/performance/exponential/kernel/src/lib.rs @@ -1,194 +1,95 @@ -#![cfg(target_os = "cuda")] #![deny(clippy::pedantic)] #![no_std] -#![feature(abi_ptx)] -#![feature(alloc_error_handler)] -#![feature(panic_info_message)] -#![feature(atomic_from_mut)] -#![feature(asm)] +#![cfg_attr(target_os = "cuda", feature(abi_ptx))] +#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] +#![cfg_attr(target_os = "cuda", feature(panic_info_message))] +#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] +#![cfg_attr(target_os = "cuda", feature(stdsimd))] +#![allow(clippy::type_complexity)] extern crate alloc; -#[macro_use] -extern crate contracts; - -use core::sync::atomic::{AtomicU64, Ordering}; +use core::sync::atomic::AtomicU64; use necsim_core::{ - cogs::{Backup, Habitat, PrimeableRng, RngCore, TurnoverRate}, - landscape::{IndexedLocation, Location}, + cogs::{Backup, Habitat, MathsCore, TurnoverRate}, + landscape::Location, }; use necsim_core_bond::{NonNegativeF64, PositiveF64}; -use necsim_impls_no_std::cogs::{ - active_lineage_sampler::independent::event_time_sampler::{ - exp::ExpEventTimeSampler, poisson::PoissonEventTimeSampler, EventTimeSampler, - }, - habitat::non_spatial::NonSpatialHabitat, - rng::wyhash::WyHash, -}; - -use rust_cuda::{ - common::{DeviceBoxConst, DeviceBoxMut}, - device::{nvptx, utils}, -}; - -#[global_allocator] -static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator; - -#[cfg(not(debug_assertions))] -#[panic_handler] -fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { - unsafe { nvptx::trap() } -} - -#[cfg(debug_assertions)] -#[panic_handler] -fn panic(panic_info: &::core::panic::PanicInfo) -> ! { - use rust_cuda::println; - - println!( - "Panic occurred at {:?}: {:?}!", - panic_info.location(), - panic_info - .message() - .unwrap_or(&format_args!("unknown reason")) - ); - - unsafe { nvptx::trap() } -} - -#[alloc_error_handler] -fn alloc_error_handler(_: core::alloc::Layout) -> ! { - unsafe { nvptx::trap() } -} +#[cfg(target_os = "cuda")] +mod benchmark; -/// A predefined, read-only 64-bit unsigned cycle counter. -#[inline] -#[must_use] -pub fn clock_counter() -> u64 { - let counter: u64; - unsafe { asm!("mov.u64 {}, %clock64;", out(reg64) counter, options(nostack)) }; - counter -} +#[cfg(target_os = "cuda")] +mod clock; -/// A predefined, 64-bit global nanosecond timer. -#[inline] -#[must_use] -pub fn clock_timer_ns() -> u64 { - let timer: u64; - unsafe { asm!("mov.u64 {}, %globaltimer;", out(reg64) timer, options(nostack)) }; - timer -} +#[cfg(target_os = "cuda")] +mod sample; -#[no_mangle] -pub unsafe extern "ptx-kernel" fn benchmark_poisson( +#[rust_cuda::common::kernel(pub use link_poisson_kernel! as impl PoissonKernel for BenchmarkPoissonKernel)] +pub fn benchmark_poisson( + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] seed: u64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] lambda: PositiveF64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] delta_t: PositiveF64, - limit: DeviceBoxConst, - total_cycles_sum: DeviceBoxMut, - total_time_sum: DeviceBoxMut, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + limit: &[u8; 16], + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + total_cycles_sum: &AtomicU64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + total_time_sum: &AtomicU64, ) { - benchmark_inter_event_times( + use necsim_impls_no_std::cogs::active_lineage_sampler::independent::event_time_sampler::poisson::PoissonEventTimeSampler; + + benchmark::inter_event_times( PoissonEventTimeSampler::new(delta_t), seed, lambda, - *limit.as_ref(), + u128::from_le_bytes(*limit), total_cycles_sum, total_time_sum, - ) + ); } -#[no_mangle] -pub unsafe extern "ptx-kernel" fn benchmark_exp( +#[rust_cuda::common::kernel(pub use link_exp_kernel! as impl ExpKernel for BenchmarkExpKernel)] +pub fn benchmark_exp( + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] seed: u64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] lambda: PositiveF64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] delta_t: PositiveF64, - limit: DeviceBoxConst, - total_cycles_sum: DeviceBoxMut, - total_time_sum: DeviceBoxMut, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + limit: &[u8; 16], + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + total_cycles_sum: &AtomicU64, + #[rustfmt::skip] + #[kernel(pass = SafeDeviceCopy)] + total_time_sum: &AtomicU64, ) { - benchmark_inter_event_times( + use necsim_impls_no_std::cogs::active_lineage_sampler::independent::event_time_sampler::exp::ExpEventTimeSampler; + + benchmark::inter_event_times( ExpEventTimeSampler::new(delta_t), seed, lambda, - *limit.as_ref(), + u128::from_le_bytes(*limit), total_cycles_sum, total_time_sum, - ) -} - -#[inline] -fn benchmark_inter_event_times< - E: EventTimeSampler, ->( - event_time_sampler: E, - seed: u64, - lambda: PositiveF64, - limit: u128, - mut total_cycles_sum: DeviceBoxMut, - mut total_time_sum: DeviceBoxMut, -) { - let habitat = NonSpatialHabitat::new((1, 1), 1); - let rng = WyHash::seed_from_u64(seed + (utils::index() as u64)); - let turnover_rate = UniformTurnoverRate { - turnover_rate: lambda, - }; - let indexed_location = IndexedLocation::new(Location::new(0, 0), 0); - - let (cycles, time) = sample_exponential_inter_event_times( - habitat, - rng, - turnover_rate, - event_time_sampler, - indexed_location, - limit, ); - - AtomicU64::from_mut(total_cycles_sum.as_mut()).fetch_add(cycles, Ordering::Relaxed); - AtomicU64::from_mut(total_time_sum.as_mut()).fetch_add(time, Ordering::Relaxed); -} - -#[inline] -#[allow(clippy::needless_pass_by_value)] -fn sample_exponential_inter_event_times< - H: Habitat, - G: PrimeableRng, - T: TurnoverRate, - E: EventTimeSampler, ->( - habitat: H, - mut rng: G, - turnover_rate: T, - event_time_sampler: E, - indexed_location: IndexedLocation, - limit: u128, -) -> (u64, u64) { - let mut last_event_time = NonNegativeF64::zero(); - - let time_start = clock_timer_ns(); - let cycle_start = clock_counter(); - - for _ in 0..limit { - let next_event_time = event_time_sampler.next_event_time_at_indexed_location_weakly_after( - &indexed_location, - last_event_time, - &habitat, - &mut rng, - &turnover_rate, - ); - - last_event_time = next_event_time; - } - - let cycle_finish = clock_counter(); - let time_finish = clock_timer_ns(); - - ( - time_finish.wrapping_sub(time_start), - cycle_finish.wrapping_sub(cycle_start), - ) } #[derive(Debug)] @@ -196,7 +97,14 @@ pub struct UniformTurnoverRate { turnover_rate: PositiveF64, } -#[contract_trait] +impl UniformTurnoverRate { + #[must_use] + pub fn new(turnover_rate: PositiveF64) -> Self { + Self { turnover_rate } + } +} + +#[contracts::contract_trait] impl Backup for UniformTurnoverRate { unsafe fn backup_unchecked(&self) -> Self { Self { @@ -205,8 +113,8 @@ impl Backup for UniformTurnoverRate { } } -#[contract_trait] -impl TurnoverRate for UniformTurnoverRate { +#[contracts::contract_trait] +impl> TurnoverRate for UniformTurnoverRate { #[must_use] #[inline] fn get_turnover_rate_at_location(&self, _location: &Location, _habitat: &H) -> NonNegativeF64 { @@ -216,3 +124,40 @@ impl TurnoverRate for UniformTurnoverRate { unsafe { core::ptr::read_volatile(&self.turnover_rate) }.into() } } + +#[cfg(target_os = "cuda")] +mod cuda_prelude { + use core::arch::nvptx; + + use rust_cuda::device::utils; + + #[global_allocator] + static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator; + + #[cfg(not(debug_assertions))] + #[panic_handler] + fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { + unsafe { nvptx::trap() } + } + + #[cfg(debug_assertions)] + #[panic_handler] + fn panic(panic_info: &::core::panic::PanicInfo) -> ! { + use rust_cuda::println; + + println!( + "Panic occurred at {:?}: {:?}!", + panic_info.location(), + panic_info + .message() + .unwrap_or(&format_args!("unknown reason")) + ); + + unsafe { nvptx::trap() } + } + + #[alloc_error_handler] + fn alloc_error_handler(_: core::alloc::Layout) -> ! { + unsafe { nvptx::trap() } + } +} diff --git a/analysis/performance/exponential/kernel/src/sample.rs b/analysis/performance/exponential/kernel/src/sample.rs new file mode 100644 index 00000000..8412ebcd --- /dev/null +++ b/analysis/performance/exponential/kernel/src/sample.rs @@ -0,0 +1,51 @@ +use necsim_core::{ + cogs::{Habitat, MathsCore, PrimeableRng, TurnoverRate}, + landscape::IndexedLocation, +}; +use necsim_core_bond::NonNegativeF64; + +use necsim_impls_no_std::cogs::active_lineage_sampler::independent::event_time_sampler::EventTimeSampler; + +use crate::clock; + +#[inline] +#[allow(clippy::needless_pass_by_value)] +pub fn exponential_inter_event_times< + M: MathsCore, + H: Habitat, + G: PrimeableRng, + T: TurnoverRate, + E: EventTimeSampler, +>( + habitat: H, + mut rng: G, + turnover_rate: T, + event_time_sampler: E, + indexed_location: IndexedLocation, + limit: u128, +) -> (u64, u64) { + let mut last_event_time = NonNegativeF64::zero(); + + let time_start = clock::timer_ns(); + let cycle_start = clock::counter(); + + for _ in 0..limit { + let next_event_time = event_time_sampler.next_event_time_at_indexed_location_weakly_after( + &indexed_location, + last_event_time, + &habitat, + &mut rng, + &turnover_rate, + ); + + last_event_time = next_event_time; + } + + let cycle_finish = clock::counter(); + let time_finish = clock::timer_ns(); + + ( + time_finish.wrapping_sub(time_start), + cycle_finish.wrapping_sub(cycle_start), + ) +} diff --git a/analysis/performance/exponential/src/main.rs b/analysis/performance/exponential/src/main.rs index 7723d78b..28d7face 100644 --- a/analysis/performance/exponential/src/main.rs +++ b/analysis/performance/exponential/src/main.rs @@ -1,24 +1,24 @@ #![deny(clippy::pedantic)] #![feature(associated_type_bounds)] -#[macro_use] -extern crate contracts; - use std::{ convert::TryFrom, + num::NonZeroU32, + sync::atomic::AtomicU64, time::{Duration, Instant}, }; -use necsim_core_bond::{NonNegativeF64, PositiveF64}; use structopt::{ clap::{Error, ErrorKind}, StructOpt, }; use necsim_core::{ - cogs::{Backup, Habitat, PrimeableRng, RngCore, TurnoverRate}, + cogs::{Habitat, MathsCore, PrimeableRng, SeedableRng, TurnoverRate}, landscape::{IndexedLocation, Location}, }; +use necsim_core_bond::{NonNegativeF64, OffByOneU32, PositiveF64}; +use necsim_core_maths::IntrinsicsMathsCore; use necsim_impls_no_std::cogs::{ active_lineage_sampler::independent::event_time_sampler::{ exp::ExpEventTimeSampler, poisson::PoissonEventTimeSampler, EventTimeSampler, @@ -27,6 +27,22 @@ use necsim_impls_no_std::cogs::{ rng::wyhash::WyHash, }; +use rust_cuda::{ + host::{CudaDropWrapper, LaunchConfig, LaunchPackage, Launcher, TypedKernel}, + rustacuda::{ + context::{Context, ContextFlags}, + device::Device, + error::CudaResult, + function::{BlockSize, GridSize}, + stream::{Stream, StreamFlags}, + }, +}; + +use analysis_performance_exponential_kernel::{ + link_exp_kernel, link_poisson_kernel, ExpKernel, ExpKernelArgs, PoissonKernel, + PoissonKernelArgs, UniformTurnoverRate, +}; + #[derive(Debug, StructOpt)] enum SamplingMode { Poisson, @@ -66,18 +82,19 @@ fn main() { let options = Options::from_args(); if options.cuda { - main_gpu(&options) + main_gpu(&options); } else { - main_cpu(&options) + main_cpu(&options); } } fn main_cpu(options: &Options) { - let habitat = NonSpatialHabitat::new((1, 1), 1); - let rng = WyHash::seed_from_u64(options.seed); - let turnover_rate = UniformTurnoverRate { - turnover_rate: options.lambda, - }; + let habitat = NonSpatialHabitat::new( + (OffByOneU32::one(), OffByOneU32::one()), + NonZeroU32::new(1).unwrap(), + ); + let rng = WyHash::::seed_from_u64(options.seed); + let turnover_rate = UniformTurnoverRate::new(options.lambda); let indexed_location = IndexedLocation::new(Location::new(0, 0), 0); match options.mode { @@ -101,12 +118,6 @@ fn main_cpu(options: &Options) { } fn main_gpu(options: &Options) { - use rust_cuda::{ - common::{DeviceBoxConst, DeviceBoxMut}, - rustacuda::{launch, memory::DeviceBox, prelude::*}, - }; - use std::ffi::CString; - rust_cuda::rustacuda::quick_init().unwrap(); // Get the first device @@ -115,56 +126,42 @@ fn main_gpu(options: &Options) { // Create a context associated to this device let _context = Context::create_and_push(ContextFlags::SCHED_AUTO, device).unwrap(); - // Load the module containing the function we want to call - let module_data = CString::new(include_str!(env!("CUDA_PTX_KERNEL"))).unwrap(); - let module = Module::load_from_string(&module_data).unwrap(); - // Create a stream to submit work to let stream = Stream::new(StreamFlags::NON_BLOCKING, None).unwrap(); - let limit = DeviceBox::new(&options.limit).unwrap(); - - let mut total_cycles_sum = DeviceBox::new(&0_u64).unwrap(); - let mut total_time_sum = DeviceBox::new(&0_u64).unwrap(); + let mut total_cycles_sum = AtomicU64::new(0_u64); + let mut total_time_sum = AtomicU64::new(0_u64); match options.mode { - SamplingMode::Exponential => unsafe { - launch!(module.benchmark_exp<<<256, 32, 0, stream>>>( + SamplingMode::Exponential => { + let mut kernel = BenchmarkExpKernel::try_new(stream, 256.into(), 32.into()).unwrap(); + + kernel.benchmark_exp( options.seed, options.lambda, options.delta_t, - DeviceBoxConst::from(&limit), - DeviceBoxMut::from(&mut total_cycles_sum), - DeviceBoxMut::from(&mut total_time_sum) - )) - .unwrap() + &options.limit.to_le_bytes(), + &total_cycles_sum, + &total_time_sum, + ) }, - SamplingMode::Poisson => unsafe { - launch!(module.benchmark_poisson<<<256, 32, 0, stream>>>( + SamplingMode::Poisson => { + let mut kernel = + BenchmarkPoissonKernel::try_new(stream, 256.into(), 32.into()).unwrap(); + + kernel.benchmark_poisson( options.seed, options.lambda, options.delta_t, - DeviceBoxConst::from(&limit), - DeviceBoxMut::from(&mut total_cycles_sum), - DeviceBoxMut::from(&mut total_time_sum) - )) - .unwrap() + &options.limit.to_le_bytes(), + &total_cycles_sum, + &total_time_sum, + ) }, } + .unwrap(); - // The kernel launch is asynchronous, so we wait for the kernel to finish - // executing - stream.synchronize().unwrap(); - - let mut result_total_cycles_sum = 0_u64; - let mut result_total_time_sum = 0_u64; - - total_cycles_sum - .copy_to(&mut result_total_cycles_sum) - .unwrap(); - total_time_sum.copy_to(&mut result_total_time_sum).unwrap(); - - let execution_time = Duration::from_nanos(result_total_time_sum / (32 * 256)); + let execution_time = Duration::from_nanos(*total_time_sum.get_mut() / (32 * 256)); println!( "Drawing {} exponential inter-event times with {:?} took {:?} ({}s) [{} cycles].", @@ -172,16 +169,17 @@ fn main_gpu(options: &Options) { options.mode, execution_time, execution_time.as_secs_f64(), - result_total_cycles_sum / (32 * 256), + *total_cycles_sum.get_mut() / (32 * 256), ); } #[allow(clippy::needless_pass_by_value)] fn sample_exponential_inter_event_times< - H: Habitat, - G: PrimeableRng, - T: TurnoverRate, - E: EventTimeSampler, + M: MathsCore, + H: Habitat, + G: PrimeableRng, + T: TurnoverRate, + E: EventTimeSampler, >( habitat: H, mut rng: G, @@ -217,28 +215,98 @@ fn sample_exponential_inter_event_times< ); } -#[derive(Debug)] -pub struct UniformTurnoverRate { - turnover_rate: PositiveF64, +pub struct BenchmarkPoissonKernel { + kernel: TypedKernel, + stream: CudaDropWrapper, + grid: GridSize, + block: BlockSize, + watcher: (), } -#[contract_trait] -impl Backup for UniformTurnoverRate { - unsafe fn backup_unchecked(&self) -> Self { - Self { - turnover_rate: self.turnover_rate, +link_poisson_kernel!(); + +impl BenchmarkPoissonKernel { + fn try_new(stream: Stream, grid: GridSize, block: BlockSize) -> CudaResult + where + Self: PoissonKernel, + { + let stream = CudaDropWrapper::from(stream); + let kernel = Self::new_kernel()?; + + Ok(Self { + kernel, + stream, + grid, + block, + watcher: (), + }) + } +} + +impl Launcher for BenchmarkPoissonKernel { + type CompilationWatcher = (); + type KernelTraitObject = dyn PoissonKernel; + + fn get_launch_package(&mut self) -> LaunchPackage { + LaunchPackage { + config: LaunchConfig { + grid: self.grid.clone(), + block: self.block.clone(), + shared_memory_size: 0_u32, + }, + + kernel: &mut self.kernel, + stream: &mut self.stream, + + watcher: &mut self.watcher, } } } -#[contract_trait] -impl TurnoverRate for UniformTurnoverRate { - #[must_use] - #[inline] - fn get_turnover_rate_at_location(&self, _location: &Location, _habitat: &H) -> NonNegativeF64 { - // Use a volatile read to ensure that the turnover rate cannot be - // optimised out of this benchmark test +pub struct BenchmarkExpKernel { + kernel: TypedKernel, + stream: CudaDropWrapper, + grid: GridSize, + block: BlockSize, + watcher: (), +} + +link_exp_kernel!(); + +impl BenchmarkExpKernel { + fn try_new(stream: Stream, grid: GridSize, block: BlockSize) -> CudaResult + where + Self: ExpKernel, + { + let stream = CudaDropWrapper::from(stream); + let kernel = Self::new_kernel()?; + + Ok(Self { + kernel, + stream, + grid, + block, + watcher: (), + }) + } +} + +impl Launcher for BenchmarkExpKernel { + type CompilationWatcher = (); + type KernelTraitObject = dyn ExpKernel; - unsafe { core::ptr::read_volatile(&self.turnover_rate) }.into() + fn get_launch_package(&mut self) -> LaunchPackage { + LaunchPackage { + config: LaunchConfig { + grid: self.grid.clone(), + block: self.block.clone(), + shared_memory_size: 0_u32, + }, + + kernel: &mut self.kernel, + stream: &mut self.stream, + + watcher: &mut self.watcher, + } } } diff --git a/analysis/performance/reporting/Cargo.toml b/analysis/performance/reporting/Cargo.toml index 47c71e6e..fb0cd1ef 100644 --- a/analysis/performance/reporting/Cargo.toml +++ b/analysis/performance/reporting/Cargo.toml @@ -10,10 +10,11 @@ license = "MIT OR Apache-2.0" [dependencies] necsim-core = { path = "../../../necsim/core" } necsim-core-bond = { path = "../../../necsim/core/bond" } +necsim-core-maths = { path = "../../../necsim/core/maths" } necsim-impls-no-std = { path = "../../../necsim/impls/no-std" } necsim-impls-std = { path = "../../../necsim/impls/std" } necsim-plugins-common = { path = "../../../necsim/plugins/common" } -structopt = "0.3.21" -log = { version = "0.4.14", features = ["std"] } -colored = "2.0.0" +structopt = "0.3" +log = { version = "0.4", features = ["std"] } +colored = "2.0" diff --git a/analysis/performance/reporting/src/main.rs b/analysis/performance/reporting/src/main.rs index f59d7796..eba749d6 100644 --- a/analysis/performance/reporting/src/main.rs +++ b/analysis/performance/reporting/src/main.rs @@ -4,18 +4,16 @@ use std::{convert::TryFrom, marker::PhantomData}; use log::LevelFilter; -use necsim_core_bond::{ClosedUnitF64, NonNegativeF64}; -use necsim_impls_std::cogs::rng::pcg::Pcg; use structopt::{ clap::{Error, ErrorKind}, StructOpt, }; -use necsim_core::{ - cogs::{LineageStore, RngCore}, - reporter::Reporter, - simulation::Simulation, -}; +use necsim_core_bond::{ClosedUnitF64, NonNegativeF64}; +use necsim_core_maths::IntrinsicsMathsCore; +use necsim_impls_std::cogs::rng::pcg::Pcg; + +use necsim_core::{cogs::SeedableRng, reporter::Reporter, simulation::SimulationBuilder}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::classical::ClassicalActiveLineageSampler, coalescence_sampler::unconditional::UnconditionalCoalescenceSampler, @@ -24,6 +22,7 @@ use necsim_impls_no_std::cogs::{ event_sampler::unconditional::UnconditionalEventSampler, habitat::almost_infinite::AlmostInfiniteHabitat, immigration_entry::never::NeverImmigrationEntry, + lineage_reference::in_memory::InMemoryLineageReference, lineage_store::coherent::globally::almost_infinite::AlmostInfiniteLineageStore, origin_sampler::{almost_infinite::AlmostInfiniteOriginSampler, pre_sampler::OriginPreSampler}, speciation_probability::uniform::UniformSpeciationProbability, @@ -55,7 +54,7 @@ struct Options { #[structopt(long)] seed: u64, #[structopt(long)] - radius: u32, + radius: u16, #[structopt(long, parse(try_from_str = try_from_str))] sigma: NonNegativeF64, #[structopt(long, parse(try_from_str = try_from_str))] @@ -91,7 +90,7 @@ fn main() { match options.mode { ReportingMode::ProgressOnly => simulate(&options, necsim_core::ReporterGroup![progress]), ReportingMode::ProgressSpeciation => { - simulate(&options, necsim_core::ReporterGroup![progress, speciation]) + simulate(&options, necsim_core::ReporterGroup![progress, speciation]); }, ReportingMode::ProgressSpeciationDispersal => simulate( &options, @@ -107,33 +106,36 @@ fn simulate(options: &Options, mut reporter: R) { let dispersal_sampler = AlmostInfiniteNormalDispersalSampler::new(options.sigma); let turnover_rate = UniformTurnoverRate::default(); let speciation_probability = UniformSpeciationProbability::new(options.speciation); - let rng = Pcg::seed_from_u64(options.seed); - let lineage_store = - AlmostInfiniteLineageStore::from_origin_sampler(AlmostInfiniteOriginSampler::new( - OriginPreSampler::all().percentage(options.sample.get()), + let rng = Pcg::::seed_from_u64(options.seed); + + let (lineage_store, active_lineage_sampler): (AlmostInfiniteLineageStore<_>, _) = + ClassicalActiveLineageSampler::init_with_store(AlmostInfiniteOriginSampler::new( + OriginPreSampler::all().percentage(options.sample), &habitat, options.radius, )); + let coalescence_sampler = UnconditionalCoalescenceSampler::default(); let emigration_exit = NeverEmigrationExit::default(); let event_sampler = UnconditionalEventSampler::default(); let immigration_entry = NeverImmigrationEntry::default(); - let active_lineage_sampler = ClassicalActiveLineageSampler::new(&lineage_store); - - let simulation = Simulation::builder() - .habitat(habitat) - .rng(rng) - .speciation_probability(speciation_probability) - .dispersal_sampler(dispersal_sampler) - .lineage_reference(PhantomData) - .lineage_store(lineage_store) - .emigration_exit(emigration_exit) - .coalescence_sampler(coalescence_sampler) - .turnover_rate(turnover_rate) - .event_sampler(event_sampler) - .immigration_entry(immigration_entry) - .active_lineage_sampler(active_lineage_sampler) - .build(); + + let simulation = SimulationBuilder { + maths: PhantomData::, + habitat, + lineage_reference: PhantomData::, + lineage_store, + dispersal_sampler, + coalescence_sampler, + turnover_rate, + speciation_probability, + emigration_exit, + event_sampler, + active_lineage_sampler, + rng, + immigration_entry, + } + .build(); simulation.simulate(&mut reporter); diff --git a/analysis/performance/reporting/src/minimal_logger.rs b/analysis/performance/reporting/src/minimal_logger.rs index a90e1489..72653075 100644 --- a/analysis/performance/reporting/src/minimal_logger.rs +++ b/analysis/performance/reporting/src/minimal_logger.rs @@ -24,9 +24,9 @@ impl log::Log for MinimalLogger { }; if record.level() > LevelFilter::Error { - println!("{:<5} {}", level_string, record.args()) + println!("{:<5} {}", level_string, record.args()); } else { - eprintln!("{:<5} {}", level_string, record.args()) + eprintln!("{:<5} {}", level_string, record.args()); } } diff --git a/analysis/rng/correlation/Cargo.toml b/analysis/rng/correlation/Cargo.toml index d3ff866d..a6172801 100644 --- a/analysis/rng/correlation/Cargo.toml +++ b/analysis/rng/correlation/Cargo.toml @@ -10,7 +10,9 @@ edition = "2018" [dependencies] necsim-core = { path = "../../../necsim/core" } necsim-core-bond = { path = "../../../necsim/core/bond" } +necsim-core-maths = { path = "../../../necsim/core/maths" } necsim-impls-no-std = { path = "../../../necsim/impls/no-std" } -contracts = { path = "../../../third-party/contracts" } -structopt = "0.3.21" +contracts = "0.6.3" +structopt = "0.3" +serde = "1.0" diff --git a/analysis/rng/correlation/src/main.rs b/analysis/rng/correlation/src/main.rs index d597aa8c..c7300294 100644 --- a/analysis/rng/correlation/src/main.rs +++ b/analysis/rng/correlation/src/main.rs @@ -1,14 +1,16 @@ #![deny(clippy::pedantic)] #![feature(associated_type_bounds)] +#![feature(control_flow_enum)] #![allow(incomplete_features)] -#![feature(const_generics)] +#![feature(adt_const_params)] #[macro_use] extern crate contracts; use structopt::StructOpt; -use necsim_core::cogs::RngCore; +use necsim_core::cogs::{MathsCore, RngCore, SeedableRng}; +use necsim_core_maths::IntrinsicsMathsCore; use necsim_impls_no_std::cogs::rng::wyhash::WyHash; mod simulation; @@ -39,17 +41,21 @@ fn main() { match options.mode { DispersalMode::NoDispersal => sample_random_streams( - CorrelationSimulationRng::::seed_from_u64(options.seed), + CorrelationSimulationRng::, 0.0>::seed_from_u64( + options.seed, + ), options.limit, ), DispersalMode::HighDispersal => sample_random_streams( - CorrelationSimulationRng::::seed_from_u64(options.seed), + CorrelationSimulationRng::, 100.0>::seed_from_u64( + options.seed, + ), options.limit, ), } } -fn sample_random_streams(mut rng: R, limit: u128) { +fn sample_random_streams>(mut rng: R, limit: u128) { for _ in 0..limit { println!( "{},{},{},{}", @@ -57,6 +63,6 @@ fn sample_random_streams(mut rng: R, limit: u128) { rng.sample_u64(), rng.sample_u64(), rng.sample_u64() - ) + ); } } diff --git a/analysis/rng/correlation/src/simulation/mod.rs b/analysis/rng/correlation/src/simulation/mod.rs index c9523d26..049504e7 100644 --- a/analysis/rng/correlation/src/simulation/mod.rs +++ b/analysis/rng/correlation/src/simulation/mod.rs @@ -1,16 +1,21 @@ -use std::collections::VecDeque; +use std::{collections::VecDeque, marker::PhantomData, ops::ControlFlow}; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use necsim_core::{ - cogs::{Backup, PrimeableRng, RngCore, SingularActiveLineageSampler}, + cogs::{Backup, MathsCore, PrimeableRng, RngCore}, landscape::{IndexedLocation, Location}, lineage::{GlobalLineageReference, Lineage}, reporter::NullReporter, - simulation::Simulation, + simulation::{Simulation, SimulationBuilder}, }; use necsim_core_bond::{ClosedUnitF64, NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::{ - active_lineage_sampler::independent::{ - event_time_sampler::poisson::PoissonEventTimeSampler, IndependentActiveLineageSampler, + active_lineage_sampler::{ + independent::{ + event_time_sampler::poisson::PoissonEventTimeSampler, IndependentActiveLineageSampler, + }, + singular::SingularActiveLineageSampler, }, coalescence_sampler::independent::IndependentCoalescenceSampler, dispersal_sampler::almost_infinite_normal::AlmostInfiniteNormalDispersalSampler, @@ -19,6 +24,7 @@ use necsim_impls_no_std::cogs::{ habitat::almost_infinite::AlmostInfiniteHabitat, immigration_entry::never::NeverImmigrationEntry, lineage_store::independent::IndependentLineageStore, + origin_sampler::{almost_infinite::AlmostInfiniteOriginSampler, pre_sampler::OriginPreSampler}, speciation_probability::uniform::UniformSpeciationProbability, turnover_rate::uniform::UniformTurnoverRate, }; @@ -28,63 +34,75 @@ use rng::InterceptingReporter; #[derive(Debug)] #[allow(clippy::module_name_repetitions, clippy::type_complexity)] -pub struct CorrelationSimulationRng { +pub struct CorrelationSimulationRng + PrimeableRng, const SIGMA: f64> +{ simulation: Simulation< - AlmostInfiniteHabitat, - InterceptingReporter, + M, + AlmostInfiniteHabitat, + InterceptingReporter, GlobalLineageReference, - IndependentLineageStore, + IndependentLineageStore>, NeverEmigrationExit, - AlmostInfiniteNormalDispersalSampler>, - IndependentCoalescenceSampler, + AlmostInfiniteNormalDispersalSampler>, + IndependentCoalescenceSampler>, UniformTurnoverRate, UniformSpeciationProbability, IndependentEventSampler< - AlmostInfiniteHabitat, - InterceptingReporter, + M, + AlmostInfiniteHabitat, + InterceptingReporter, NeverEmigrationExit, - AlmostInfiniteNormalDispersalSampler>, + AlmostInfiniteNormalDispersalSampler>, UniformTurnoverRate, UniformSpeciationProbability, >, NeverImmigrationEntry, IndependentActiveLineageSampler< - AlmostInfiniteHabitat, - InterceptingReporter, + M, + AlmostInfiniteHabitat, + InterceptingReporter, NeverEmigrationExit, - AlmostInfiniteNormalDispersalSampler>, + AlmostInfiniteNormalDispersalSampler>, UniformTurnoverRate, UniformSpeciationProbability, PoissonEventTimeSampler, >, >, - other_rngs_lineages: VecDeque<(InterceptingReporter, Lineage)>, + other_rngs_lineages: VecDeque<(InterceptingReporter, Lineage)>, } -impl + PrimeableRng, const SIGMA: f64> RngCore - for CorrelationSimulationRng +impl + PrimeableRng, const SIGMA: f64> RngCore + for CorrelationSimulationRng { type Seed = G::Seed; fn from_seed(seed: Self::Seed) -> Self { - let mut simulation = Simulation::builder() - .habitat(AlmostInfiniteHabitat::default()) - .rng(InterceptingReporter::::from_seed(seed.clone())) - .speciation_probability(UniformSpeciationProbability::new(ClosedUnitF64::zero())) - .dispersal_sampler(AlmostInfiniteNormalDispersalSampler::new( - NonNegativeF64::new(SIGMA).unwrap(), - )) - .lineage_reference(std::marker::PhantomData::) - .lineage_store(IndependentLineageStore::default()) - .emigration_exit(NeverEmigrationExit::default()) - .coalescence_sampler(IndependentCoalescenceSampler::default()) - .turnover_rate(UniformTurnoverRate::default()) - .event_sampler(IndependentEventSampler::default()) - .immigration_entry(NeverImmigrationEntry::default()) - .active_lineage_sampler(IndependentActiveLineageSampler::empty( + let habitat = AlmostInfiniteHabitat::default(); + + let (lineage_store, active_lineage_sampler, _) = + IndependentActiveLineageSampler::init_with_store_and_lineages( + AlmostInfiniteOriginSampler::new(OriginPreSampler::none(), &habitat, 0), PoissonEventTimeSampler::new(PositiveF64::new(1.0_f64).unwrap()), - )) - .build(); + ); + + let mut simulation = SimulationBuilder { + maths: PhantomData::, + habitat, + lineage_reference: PhantomData::, + lineage_store, + dispersal_sampler: AlmostInfiniteNormalDispersalSampler::new( + NonNegativeF64::new(SIGMA).unwrap(), + ), + coalescence_sampler: IndependentCoalescenceSampler::default(), + turnover_rate: UniformTurnoverRate::default(), + speciation_probability: UniformSpeciationProbability::new(ClosedUnitF64::zero()), + emigration_exit: NeverEmigrationExit::default(), + event_sampler: IndependentEventSampler::default(), + active_lineage_sampler, + rng: InterceptingReporter::::from_seed(seed.clone()), + immigration_entry: NeverImmigrationEntry::default(), + } + .build(); let lineage = Lineage::new( IndexedLocation::new(Location::new(0, 0), 0), @@ -97,21 +115,21 @@ impl + PrimeableRng, const SIGMA: f64> RngCore let other_rngs_lineages = vec![ ( - InterceptingReporter::::from_seed(seed.clone()), + InterceptingReporter::::from_seed(seed.clone()), Lineage::new( IndexedLocation::new(Location::new(0, 1), 0), simulation.habitat(), ), ), ( - InterceptingReporter::::from_seed(seed.clone()), + InterceptingReporter::::from_seed(seed.clone()), Lineage::new( IndexedLocation::new(Location::new(1, 0), 0), simulation.habitat(), ), ), ( - InterceptingReporter::::from_seed(seed), + InterceptingReporter::::from_seed(seed), Lineage::new( IndexedLocation::new(Location::new(1, 1), 0), simulation.habitat(), @@ -132,8 +150,16 @@ impl + PrimeableRng, const SIGMA: f64> RngCore break sample; } - self.simulation - .simulate_incremental_early_stop(|_, steps| steps >= 256, &mut NullReporter); + self.simulation.simulate_incremental_early_stop( + |_, steps, _| { + if steps >= 256 { + ControlFlow::BREAK + } else { + ControlFlow::CONTINUE + } + }, + &mut NullReporter, + ); }; let (mut next_rng, next_lineage) = self.other_rngs_lineages.pop_front().unwrap(); @@ -154,7 +180,9 @@ impl + PrimeableRng, const SIGMA: f64> RngCore } #[contract_trait] -impl Backup for CorrelationSimulationRng { +impl + PrimeableRng, const SIGMA: f64> Backup + for CorrelationSimulationRng +{ unsafe fn backup_unchecked(&self) -> Self { Self { simulation: self.simulation.backup_unchecked(), @@ -167,8 +195,26 @@ impl Backup for CorrelationSimulati } } -impl Clone for CorrelationSimulationRng { +impl + PrimeableRng, const SIGMA: f64> Clone + for CorrelationSimulationRng +{ fn clone(&self) -> Self { unsafe { self.backup_unchecked() } } } + +impl + PrimeableRng, const SIGMA: f64> Serialize + for CorrelationSimulationRng +{ + fn serialize(&self, _serializer: S) -> Result { + unimplemented!() + } +} + +impl<'de, M: MathsCore, G: RngCore + PrimeableRng, const SIGMA: f64> Deserialize<'de> + for CorrelationSimulationRng +{ + fn deserialize>(_deserializer: D) -> Result { + unimplemented!() + } +} diff --git a/analysis/rng/correlation/src/simulation/rng.rs b/analysis/rng/correlation/src/simulation/rng.rs index 70b3047c..932ccdb7 100644 --- a/analysis/rng/correlation/src/simulation/rng.rs +++ b/analysis/rng/correlation/src/simulation/rng.rs @@ -1,9 +1,11 @@ -use std::{collections::VecDeque, fmt}; +use std::{collections::VecDeque, fmt, marker::PhantomData}; -use necsim_core::cogs::{Backup, PrimeableRng, RngCore}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; #[derive(Clone)] -pub struct InterceptingReporter { +pub struct InterceptingReporter> { inner: G, buffer: VecDeque, @@ -12,31 +14,33 @@ pub struct InterceptingReporter { snd_last_sequence_length: usize, cmp_sequence_length: usize, sequence_length: usize, + + marker: PhantomData, } -impl fmt::Debug for InterceptingReporter { +impl> fmt::Debug for InterceptingReporter { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("InterceptingReporter") + fmt.debug_struct(stringify!(InterceptingReporter)) .field("inner", &self.inner) .field("buffer", &self.buffer) .finish() } } -impl InterceptingReporter { +impl> InterceptingReporter { pub fn buffer(&mut self) -> &mut VecDeque { &mut self.buffer } } #[contract_trait] -impl Backup for InterceptingReporter { +impl> Backup for InterceptingReporter { unsafe fn backup_unchecked(&self) -> Self { self.clone() } } -impl RngCore for InterceptingReporter { +impl> RngCore for InterceptingReporter { type Seed = G::Seed; #[must_use] @@ -50,6 +54,8 @@ impl RngCore for InterceptingReporter { snd_last_sequence_length: 0, cmp_sequence_length: 0, sequence_length: 0, + + marker: PhantomData::, } } @@ -67,7 +73,7 @@ impl RngCore for InterceptingReporter { } } -impl PrimeableRng for InterceptingReporter { +impl> PrimeableRng for InterceptingReporter { fn prime_with(&mut self, location_index: u64, time_index: u64) { if Some((location_index, time_index)) == self.snd_last_reprime { self.cmp_sequence_length = self.snd_last_sequence_length; @@ -82,6 +88,18 @@ impl PrimeableRng for InterceptingReporter { self.sequence_length = 0; - self.inner.prime_with(location_index, time_index) + self.inner.prime_with(location_index, time_index); + } +} + +impl> Serialize for InterceptingReporter { + fn serialize(&self, _serializer: S) -> Result { + unimplemented!() + } +} + +impl<'de, M: MathsCore, R: RngCore> Deserialize<'de> for InterceptingReporter { + fn deserialize>(_deserializer: D) -> Result { + unimplemented!() } } diff --git a/analysis/rng/hash/Cargo.toml b/analysis/rng/hash/Cargo.toml index 8d223658..2a29b551 100644 --- a/analysis/rng/hash/Cargo.toml +++ b/analysis/rng/hash/Cargo.toml @@ -9,7 +9,8 @@ edition = "2018" [dependencies] necsim-core = { path = "../../../necsim/core" } +necsim-core-maths = { path = "../../../necsim/core/maths" } necsim-impls-no-std = { path = "../../../necsim/impls/no-std" } -structopt = "0.3.21" -rand = "0.8.3" +structopt = "0.3" +rand = "0.8" diff --git a/analysis/rng/hash/src/main.rs b/analysis/rng/hash/src/main.rs index 5334bb9f..a0de559e 100644 --- a/analysis/rng/hash/src/main.rs +++ b/analysis/rng/hash/src/main.rs @@ -5,7 +5,8 @@ use std::io::{self, BufWriter, Write}; use rand::{rngs::StdRng, RngCore, SeedableRng}; use structopt::StructOpt; -use necsim_core::cogs::{PrimeableRng, RngCore as _}; +use necsim_core::cogs::{PrimeableRng, RngCore as _, SeedableRng as _}; +use necsim_core_maths::IntrinsicsMathsCore; use necsim_impls_no_std::cogs::rng::wyhash::WyHash; #[derive(Debug, StructOpt)] @@ -42,7 +43,7 @@ fn main() -> io::Result<()> { HashMode::Update => { test_update_hash(u64::MIN, &mut stdout, options.raw_output)?; for _ in 0..options.limit { - test_update_hash(rng.next_u64(), &mut stdout, options.raw_output)? + test_update_hash(rng.next_u64(), &mut stdout, options.raw_output)?; } test_update_hash(u64::MAX, &mut stdout, options.raw_output)?; }, @@ -80,14 +81,15 @@ fn main() -> io::Result<()> { } fn test_update_hash(state: u64, writer: &mut W, raw_output: bool) -> io::Result<()> { - let mut rng_origin = WyHash::from_seed(state.to_le_bytes()); + let mut rng_origin = WyHash::::from_seed(state.to_le_bytes()); let hash_origin = optional_undiffuse(rng_origin.sample_u64(), raw_output); for i in 0..64 { - let mut rng_flipped = WyHash::from_seed((state ^ (0x1_u64 << i)).to_le_bytes()); + let mut rng_flipped = + WyHash::::from_seed((state ^ (0x1_u64 << i)).to_le_bytes()); let hash_flipped = optional_undiffuse(rng_flipped.sample_u64(), raw_output); - writeln!(writer, "{}", hash_origin ^ hash_flipped)? + writeln!(writer, "{}", hash_origin ^ hash_flipped)?; } Ok(()) @@ -101,7 +103,7 @@ fn test_prime_hash( raw_prime: bool, raw_output: bool, ) -> io::Result<()> { - let mut rng_origin = WyHash::seed_from_u64(seed); + let mut rng_origin = WyHash::::seed_from_u64(seed); rng_origin.prime_with( optional_undiffuse(location_index, raw_prime), optional_undiffuse(time_index, raw_prime), @@ -109,17 +111,17 @@ fn test_prime_hash( let hash_origin = optional_undiffuse(rng_origin.sample_u64(), raw_output); for i in 0..64 { - let mut rng_flipped = WyHash::seed_from_u64(seed ^ (0x1_u64 << i)); + let mut rng_flipped = WyHash::::seed_from_u64(seed ^ (0x1_u64 << i)); rng_origin.prime_with( optional_undiffuse(location_index, raw_prime), optional_undiffuse(time_index, raw_prime), ); let hash_flipped = optional_undiffuse(rng_flipped.sample_u64(), raw_output); - writeln!(writer, "{}", hash_origin ^ hash_flipped)? + writeln!(writer, "{}", hash_origin ^ hash_flipped)?; } - let mut rng_flipped = WyHash::seed_from_u64(seed); + let mut rng_flipped = WyHash::::seed_from_u64(seed); for i in 0..64 { rng_origin.prime_with( @@ -128,7 +130,7 @@ fn test_prime_hash( ); let hash_flipped = optional_undiffuse(rng_flipped.sample_u64(), raw_output); - writeln!(writer, "{}", hash_origin ^ hash_flipped)? + writeln!(writer, "{}", hash_origin ^ hash_flipped)?; } for i in 0..64 { @@ -138,7 +140,7 @@ fn test_prime_hash( ); let hash_flipped = optional_undiffuse(rng_flipped.sample_u64(), raw_output); - writeln!(writer, "{}", hash_origin ^ hash_flipped)? + writeln!(writer, "{}", hash_origin ^ hash_flipped)?; } Ok(()) diff --git a/analysis/rng/randomness/Cargo.toml b/analysis/rng/randomness/Cargo.toml index f84448cf..dd4e8116 100644 --- a/analysis/rng/randomness/Cargo.toml +++ b/analysis/rng/randomness/Cargo.toml @@ -10,9 +10,11 @@ edition = "2018" [dependencies] necsim-core = { path = "../../../necsim/core" } necsim-core-bond = { path = "../../../necsim/core/bond" } +necsim-core-maths = { path = "../../../necsim/core/maths" } necsim-impls-no-std = { path = "../../../necsim/impls/no-std" } necsim-impls-std = { path = "../../../necsim/impls/std" } -byte-unit = "4.0.12" -contracts = { path = "../../../third-party/contracts" } -structopt = "0.3.21" +byte-unit = "4.0" +contracts = "0.6.3" +structopt = "0.3" +serde = "1.0" diff --git a/analysis/rng/randomness/src/main.rs b/analysis/rng/randomness/src/main.rs index d2e54a02..e58b1834 100644 --- a/analysis/rng/randomness/src/main.rs +++ b/analysis/rng/randomness/src/main.rs @@ -1,4 +1,5 @@ #![deny(clippy::pedantic)] +#![feature(control_flow_enum)] #[macro_use] extern crate contracts; @@ -8,7 +9,8 @@ use std::io::{self, BufWriter, Write}; use byte_unit::{Byte, ByteError}; use structopt::StructOpt; -use necsim_core::cogs::RngCore; +use necsim_core::cogs::{MathsCore, RngCore, SeedableRng}; +use necsim_core_maths::IntrinsicsMathsCore; use necsim_impls_no_std::cogs::rng::wyhash::WyHash; use necsim_impls_std::cogs::rng::pcg::Pcg; @@ -47,55 +49,64 @@ fn main() -> io::Result<()> { let stdout = BufWriter::with_capacity(4096, io::stdout()); match (options.limit, options.mode) { - (None, GeneratorMode::Monolithic) => { - produce_unlimited_randomness(stdout, Pcg::seed_from_u64(options.seed)) - }, - (None, GeneratorMode::Independent) => { - produce_unlimited_randomness(stdout, WyHash::seed_from_u64(options.seed)) - }, + (None, GeneratorMode::Monolithic) => produce_unlimited_randomness( + stdout, + Pcg::::seed_from_u64(options.seed), + ), + (None, GeneratorMode::Independent) => produce_unlimited_randomness( + stdout, + WyHash::::seed_from_u64(options.seed), + ), (None, GeneratorMode::IndependentSimulation) => produce_unlimited_randomness( stdout, - SimulationRng::::seed_from_u64(options.seed), + SimulationRng::, 100>::seed_from_u64(options.seed), ), (None, GeneratorMode::IndependentSimulationNoDispersal) => produce_unlimited_randomness( stdout, - SimulationRng::::seed_from_u64(options.seed), + SimulationRng::, 1>::seed_from_u64(options.seed), + ), + (Some(limit), GeneratorMode::Monolithic) => produce_limited_randomness( + stdout, + Pcg::::seed_from_u64(options.seed), + limit, + ), + (Some(limit), GeneratorMode::Independent) => produce_limited_randomness( + stdout, + WyHash::::seed_from_u64(options.seed), + limit, ), - (Some(limit), GeneratorMode::Monolithic) => { - produce_limited_randomness(stdout, Pcg::seed_from_u64(options.seed), limit) - }, - (Some(limit), GeneratorMode::Independent) => { - produce_limited_randomness(stdout, WyHash::seed_from_u64(options.seed), limit) - }, (Some(limit), GeneratorMode::IndependentSimulation) => produce_limited_randomness( stdout, - SimulationRng::::seed_from_u64(options.seed), + SimulationRng::, 100>::seed_from_u64(options.seed), limit, ), (Some(limit), GeneratorMode::IndependentSimulationNoDispersal) => { produce_limited_randomness( stdout, - SimulationRng::::seed_from_u64(options.seed), + SimulationRng::, 1>::seed_from_u64(options.seed), limit, ) }, } } -fn produce_limited_randomness( +fn produce_limited_randomness>( mut writer: W, mut rng: R, limit: u128, ) -> io::Result<()> { for _ in 0..(limit / 8) { - writer.write_all(&rng.sample_u64().to_le_bytes())? + writer.write_all(&rng.sample_u64().to_le_bytes())?; } writer.write_all(&rng.sample_u64().to_le_bytes()[..((limit % 8) as usize)]) } -fn produce_unlimited_randomness(mut writer: W, mut rng: R) -> io::Result<()> { +fn produce_unlimited_randomness>( + mut writer: W, + mut rng: R, +) -> io::Result<()> { loop { - writer.write_all(&rng.sample_u64().to_le_bytes())? + writer.write_all(&rng.sample_u64().to_le_bytes())?; } } diff --git a/analysis/rng/randomness/src/simulation/mod.rs b/analysis/rng/randomness/src/simulation/mod.rs index bc348e59..9ae3baa5 100644 --- a/analysis/rng/randomness/src/simulation/mod.rs +++ b/analysis/rng/randomness/src/simulation/mod.rs @@ -1,14 +1,21 @@ +use std::{marker::PhantomData, num::NonZeroU32, ops::ControlFlow}; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use necsim_core::{ - cogs::{Backup, PrimeableRng, RngCore, SingularActiveLineageSampler}, + cogs::{Backup, MathsCore, PrimeableRng, RngCore}, landscape::{IndexedLocation, Location}, lineage::{GlobalLineageReference, Lineage}, reporter::NullReporter, - simulation::Simulation, + simulation::{Simulation, SimulationBuilder}, }; -use necsim_core_bond::{ClosedUnitF64, PositiveF64}; +use necsim_core_bond::{ClosedUnitF64, OffByOneU32, PositiveF64}; use necsim_impls_no_std::cogs::{ - active_lineage_sampler::independent::{ - event_time_sampler::poisson::PoissonEventTimeSampler, IndependentActiveLineageSampler, + active_lineage_sampler::{ + independent::{ + event_time_sampler::poisson::PoissonEventTimeSampler, IndependentActiveLineageSampler, + }, + singular::SingularActiveLineageSampler, }, coalescence_sampler::independent::IndependentCoalescenceSampler, dispersal_sampler::non_spatial::NonSpatialDispersalSampler, @@ -17,6 +24,7 @@ use necsim_impls_no_std::cogs::{ habitat::non_spatial::NonSpatialHabitat, immigration_entry::never::NeverImmigrationEntry, lineage_store::independent::IndependentLineageStore, + origin_sampler::{non_spatial::NonSpatialOriginSampler, pre_sampler::OriginPreSampler}, speciation_probability::uniform::UniformSpeciationProbability, turnover_rate::uniform::UniformTurnoverRate, }; @@ -26,31 +34,34 @@ use rng::InterceptingReporter; #[derive(Debug)] #[allow(clippy::module_name_repetitions, clippy::type_complexity)] -pub struct SimulationRng { +pub struct SimulationRng + PrimeableRng, const SIZE: u32> { simulation: Simulation< - NonSpatialHabitat, - InterceptingReporter, + M, + NonSpatialHabitat, + InterceptingReporter, GlobalLineageReference, - IndependentLineageStore, + IndependentLineageStore>, NeverEmigrationExit, - NonSpatialDispersalSampler>, - IndependentCoalescenceSampler, + NonSpatialDispersalSampler>, + IndependentCoalescenceSampler>, UniformTurnoverRate, UniformSpeciationProbability, IndependentEventSampler< - NonSpatialHabitat, - InterceptingReporter, + M, + NonSpatialHabitat, + InterceptingReporter, NeverEmigrationExit, - NonSpatialDispersalSampler>, + NonSpatialDispersalSampler>, UniformTurnoverRate, UniformSpeciationProbability, >, NeverImmigrationEntry, IndependentActiveLineageSampler< - NonSpatialHabitat, - InterceptingReporter, + M, + NonSpatialHabitat, + InterceptingReporter, NeverEmigrationExit, - NonSpatialDispersalSampler>, + NonSpatialDispersalSampler>, UniformTurnoverRate, UniformSpeciationProbability, PoissonEventTimeSampler, @@ -58,26 +69,38 @@ pub struct SimulationRng { >, } -impl RngCore for SimulationRng { +impl + PrimeableRng, const SIZE: u32> RngCore + for SimulationRng +{ type Seed = G::Seed; fn from_seed(seed: Self::Seed) -> Self { - let mut simulation = Simulation::builder() - .habitat(NonSpatialHabitat::new((SIZE, SIZE), SIZE)) - .rng(InterceptingReporter::::from_seed(seed)) - .speciation_probability(UniformSpeciationProbability::new(ClosedUnitF64::zero())) - .dispersal_sampler(NonSpatialDispersalSampler::default()) - .lineage_reference(std::marker::PhantomData::) - .lineage_store(IndependentLineageStore::default()) - .emigration_exit(NeverEmigrationExit::default()) - .coalescence_sampler(IndependentCoalescenceSampler::default()) - .turnover_rate(UniformTurnoverRate::default()) - .event_sampler(IndependentEventSampler::default()) - .immigration_entry(NeverImmigrationEntry::default()) - .active_lineage_sampler(IndependentActiveLineageSampler::empty( + let size = OffByOneU32::new(u64::from(SIZE)).unwrap(); + + let habitat = NonSpatialHabitat::new((size, size), NonZeroU32::new(SIZE).unwrap()); + + let (lineage_store, active_lineage_sampler, _) = + IndependentActiveLineageSampler::init_with_store_and_lineages( + NonSpatialOriginSampler::new(OriginPreSampler::none(), &habitat), PoissonEventTimeSampler::new(PositiveF64::new(1.0_f64).unwrap()), - )) - .build(); + ); + + let mut simulation = SimulationBuilder { + maths: PhantomData::, + habitat, + lineage_reference: PhantomData::, + lineage_store, + dispersal_sampler: NonSpatialDispersalSampler::default(), + coalescence_sampler: IndependentCoalescenceSampler::default(), + turnover_rate: UniformTurnoverRate::default(), + speciation_probability: UniformSpeciationProbability::new(ClosedUnitF64::zero()), + emigration_exit: NeverEmigrationExit::default(), + event_sampler: IndependentEventSampler::default(), + active_lineage_sampler, + rng: InterceptingReporter::::from_seed(seed), + immigration_entry: NeverImmigrationEntry::default(), + } + .build(); let lineage = Lineage::new( IndexedLocation::new(Location::new(0, 0), 0), @@ -97,14 +120,24 @@ impl RngCore for SimulationRng= 256, &mut NullReporter); + self.simulation.simulate_incremental_early_stop( + |_, steps, _| { + if steps >= 256 { + ControlFlow::BREAK + } else { + ControlFlow::CONTINUE + } + }, + &mut NullReporter, + ); } } } #[contract_trait] -impl Backup for SimulationRng { +impl + PrimeableRng, const SIZE: u32> Backup + for SimulationRng +{ unsafe fn backup_unchecked(&self) -> Self { Self { simulation: self.simulation.backup_unchecked(), @@ -112,8 +145,26 @@ impl Backup for SimulationRng Clone for SimulationRng { +impl + PrimeableRng, const SIZE: u32> Clone + for SimulationRng +{ fn clone(&self) -> Self { unsafe { self.backup_unchecked() } } } + +impl + PrimeableRng, const SIZE: u32> Serialize + for SimulationRng +{ + fn serialize(&self, _serializer: S) -> Result { + unimplemented!() + } +} + +impl<'de, M: MathsCore, R: RngCore + PrimeableRng, const SIZE: u32> Deserialize<'de> + for SimulationRng +{ + fn deserialize>(_deserializer: D) -> Result { + unimplemented!() + } +} diff --git a/analysis/rng/randomness/src/simulation/rng.rs b/analysis/rng/randomness/src/simulation/rng.rs index 70b3047c..9a103025 100644 --- a/analysis/rng/randomness/src/simulation/rng.rs +++ b/analysis/rng/randomness/src/simulation/rng.rs @@ -1,9 +1,11 @@ -use std::{collections::VecDeque, fmt}; +use std::{collections::VecDeque, fmt, marker::PhantomData}; -use necsim_core::cogs::{Backup, PrimeableRng, RngCore}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; #[derive(Clone)] -pub struct InterceptingReporter { +pub struct InterceptingReporter> { inner: G, buffer: VecDeque, @@ -12,9 +14,11 @@ pub struct InterceptingReporter { snd_last_sequence_length: usize, cmp_sequence_length: usize, sequence_length: usize, + + marker: PhantomData, } -impl fmt::Debug for InterceptingReporter { +impl> fmt::Debug for InterceptingReporter { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("InterceptingReporter") .field("inner", &self.inner) @@ -23,20 +27,20 @@ impl fmt::Debug for InterceptingReporter { } } -impl InterceptingReporter { +impl> InterceptingReporter { pub fn buffer(&mut self) -> &mut VecDeque { &mut self.buffer } } #[contract_trait] -impl Backup for InterceptingReporter { +impl> Backup for InterceptingReporter { unsafe fn backup_unchecked(&self) -> Self { self.clone() } } -impl RngCore for InterceptingReporter { +impl> RngCore for InterceptingReporter { type Seed = G::Seed; #[must_use] @@ -50,6 +54,8 @@ impl RngCore for InterceptingReporter { snd_last_sequence_length: 0, cmp_sequence_length: 0, sequence_length: 0, + + marker: PhantomData::, } } @@ -67,7 +73,7 @@ impl RngCore for InterceptingReporter { } } -impl PrimeableRng for InterceptingReporter { +impl> PrimeableRng for InterceptingReporter { fn prime_with(&mut self, location_index: u64, time_index: u64) { if Some((location_index, time_index)) == self.snd_last_reprime { self.cmp_sequence_length = self.snd_last_sequence_length; @@ -82,6 +88,18 @@ impl PrimeableRng for InterceptingReporter { self.sequence_length = 0; - self.inner.prime_with(location_index, time_index) + self.inner.prime_with(location_index, time_index); + } +} + +impl> Serialize for InterceptingReporter { + fn serialize(&self, _serializer: S) -> Result { + unimplemented!() + } +} + +impl<'de, M: MathsCore, R: RngCore> Deserialize<'de> for InterceptingReporter { + fn deserialize>(_deserializer: D) -> Result { + unimplemented!() } } diff --git a/necsim-rust b/necsim-rust index 9ba6b076..bfaed0bd 160000 --- a/necsim-rust +++ b/necsim-rust @@ -1 +1 @@ -Subproject commit 9ba6b076e5076e1d8fe3a4d464f98df610c62836 +Subproject commit bfaed0bd14c66cbe425e9fcc410b7adfacb8615a