From ca0f10f88dc119491c13634098c737d4ac55cfa6 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 7 Jan 2024 06:05:26 +0000 Subject: [PATCH 01/28] Initial progress towards using rust-cuda with async --- Cargo.lock | 260 ++++++++++++++++-- necsim/core/Cargo.toml | 4 +- necsim/core/src/cogs/emigration_exit.rs | 4 +- necsim/core/src/cogs/lineage_store.rs | 8 +- necsim/core/src/event.rs | 2 +- necsim/core/src/landscape/extent.rs | 18 +- necsim/core/src/landscape/location.rs | 13 +- necsim/core/src/lineage.rs | 18 +- necsim/core/src/reporter/boolean.rs | 2 +- necsim/core/src/simulation/builder.rs | 2 +- .../src/simulation/process/immigration.rs | 2 +- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/cuda/src/cogs/rng.rs | 37 ++- necsim/impls/cuda/src/event_buffer.rs | 8 +- necsim/impls/cuda/src/value_buffer.rs | 34 ++- necsim/impls/no-std/Cargo.toml | 4 +- necsim/impls/no-std/src/array2d.rs | 4 +- .../alias/location/mod.rs | 2 +- .../alias/location/sampler.rs | 2 +- .../independent/event_time_sampler/const.rs | 2 +- .../independent/event_time_sampler/exp.rs | 2 +- .../independent/event_time_sampler/fixed.rs | 2 +- .../event_time_sampler/geometric.rs | 2 +- .../independent/event_time_sampler/poisson.rs | 2 +- .../active_lineage_sampler/independent/mod.rs | 7 +- .../cogs/coalescence_sampler/independent.rs | 2 +- .../almost_infinite_normal.rs | 2 +- .../in_memory/packed_alias/mod.rs | 2 +- .../in_memory/separable_alias/dispersal.rs | 4 +- .../src/cogs/dispersal_sampler/non_spatial.rs | 2 +- .../dispersal_sampler/spatially_implicit.rs | 2 +- .../cogs/dispersal_sampler/trespassing/mod.rs | 2 +- .../dispersal_sampler/trespassing/uniform.rs | 2 +- .../cogs/dispersal_sampler/wrapping_noise.rs | 4 +- .../no-std/src/cogs/emigration_exit/never.rs | 2 +- .../gillespie/conditional/mod.rs | 2 +- .../gillespie/conditional/probability.rs | 1 + .../src/cogs/event_sampler/independent.rs | 8 +- .../no-std/src/cogs/event_sampler/tracking.rs | 4 +- .../src/cogs/habitat/almost_infinite.rs | 2 +- .../no-std/src/cogs/habitat/in_memory.rs | 4 +- .../no-std/src/cogs/habitat/non_spatial.rs | 4 +- .../src/cogs/habitat/spatially_implicit.rs | 2 +- .../src/cogs/habitat/wrapping_noise/mod.rs | 8 +- .../src/cogs/immigration_entry/never.rs | 2 +- .../coherent/globally/gillespie/mod.rs | 2 +- .../coherent/globally/gillespie/store.rs | 6 +- .../coherent/globally/singleton_demes/mod.rs | 2 +- .../globally/singleton_demes/store.rs | 4 +- .../coherent/locally/classical/mod.rs | 2 +- .../coherent/locally/classical/store.rs | 2 +- .../src/cogs/lineage_store/independent.rs | 2 +- .../src/cogs/origin_sampler/in_memory.rs | 2 +- .../src/cogs/origin_sampler/non_spatial.rs | 2 +- .../spatially_implicit.rs | 2 +- .../cogs/speciation_probability/uniform.rs | 2 +- .../src/cogs/turnover_rate/in_memory.rs | 2 +- .../no-std/src/cogs/turnover_rate/uniform.rs | 2 +- .../no-std/src/decomposition/equal/area.rs | 2 +- .../no-std/src/decomposition/equal/mod.rs | 2 +- .../no-std/src/decomposition/equal/weight.rs | 2 +- .../parallelisation/independent/landscape.rs | 2 +- .../src/individual/feather/dataframe.rs | 2 +- .../species/src/individual/sqlite/database.rs | 2 +- .../species/src/location/feather/dataframe.rs | 10 +- .../species/src/location/feather/reporter.rs | 4 +- necsim/plugins/tskit/src/tree/table.rs | 2 +- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 6 +- .../algorithms/cuda/gpu-kernel/src/lib.rs | 144 +++++----- rustcoalescence/algorithms/cuda/src/cuda.rs | 4 +- rustcoalescence/algorithms/cuda/src/error.rs | 2 +- rustcoalescence/algorithms/cuda/src/info.rs | 2 +- .../src/event_skipping/initialiser/fixup.rs | 2 +- .../gillespie/classical/initialiser/fixup.rs | 2 +- .../gillespie/turnover/initialiser/fixup.rs | 2 +- 77 files changed, 490 insertions(+), 244 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c7c4c3eef..e14cc09f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -622,6 +622,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1912868bad388722991f80323855d922e32b09ad00d76a13a98e465358765079" +[[package]] +name = "find_cuda_helper" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad" +dependencies = [ + "glob", +] + [[package]] name = "findshlibs" version = "0.10.2" @@ -656,6 +665,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "getrandom" version = "0.2.11" @@ -858,6 +880,29 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "mbox" version = "0.6.0" @@ -1154,6 +1199,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -1169,6 +1224,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oneshot" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "pcg_rand" version = "0.13.0" @@ -1199,6 +1269,18 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.28" @@ -1335,8 +1417,17 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -1347,9 +1438,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -1385,24 +1482,39 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" +source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" dependencies = [ "const-type-layout", "final", + "oneshot", + "regex", "rust-cuda-derive", - "rust-cuda-ptx-jit", + "rust-cuda-kernel", "rustacuda", "rustacuda_core", "rustacuda_derive", + "safer_owning_ref", ] [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" +source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust-cuda-kernel" +version = "0.1.0" +source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" dependencies = [ "cargo_metadata", "colored", + "find_cuda_helper", "lazy_static", "proc-macro-error", "proc-macro2", @@ -1413,23 +1525,13 @@ dependencies = [ "serde_json", "strip-ansi-escapes", "syn 1.0.109", -] - -[[package]] -name = "rust-cuda-ptx-jit" -version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" -dependencies = [ - "lazy_static", - "regex", - "rustacuda", + "thiserror", ] [[package]] name = "rustacuda" version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47208516ab5338b592d63560e90eaef405d0ec880347eaf7742d893b0a31e228" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "bitflags 1.3.2", "cuda-driver-sys", @@ -1440,14 +1542,12 @@ dependencies = [ [[package]] name = "rustacuda_core" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3858b08976dc2f860c5efbbb48cdcb0d4fafca92a6ac0898465af16c0dbe848" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" [[package]] name = "rustacuda_derive" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ce8670a1a1d0fc2514a3b846dacdb65646f9bd494b6674cfacbb4ce430bd7e" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "proc-macro2", "quote", @@ -1633,12 +1733,33 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "ryu" version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +[[package]] +name = "safer_owning_ref" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af21b9de2df966f61c07b5b541c81c98225b86e48ababd43366a642654de30ef" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "seahash" version = "4.1.0" @@ -1733,6 +1854,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shell-words" version = "1.1.0" @@ -1835,6 +1965,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiff" version = "0.9.0" @@ -1864,6 +2004,67 @@ dependencies = [ "serde", ] +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "tskit" version = "0.14.1" @@ -1909,6 +2110,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -2041,6 +2248,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index de5b6c629..a800856e5 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "host"], optional = true } diff --git a/necsim/core/src/cogs/emigration_exit.rs b/necsim/core/src/cogs/emigration_exit.rs index 45c6d37c3..f594da68c 100644 --- a/necsim/core/src/cogs/emigration_exit.rs +++ b/necsim/core/src/cogs/emigration_exit.rs @@ -27,8 +27,8 @@ pub trait EmigrationExit, G: RngCore, S: LineageS ret_event_time, )) => { ret_lineage_reference == &old(global_reference.clone()) && - ret_dispersal_origin == &old(dispersal_origin.clone()) && - ret_dispersal_target == &old(dispersal_target.clone()) && + ret_dispersal_origin == &old(dispersal_origin) && + ret_dispersal_target == &old(dispersal_target) && ret_prior_time == &old(prior_time) && ret_event_time == &old(event_time) }, diff --git a/necsim/core/src/cogs/lineage_store.rs b/necsim/core/src/cogs/lineage_store.rs index afc0d319d..c31899788 100644 --- a/necsim/core/src/cogs/lineage_store.rs +++ b/necsim/core/src/cogs/lineage_store.rs @@ -49,12 +49,12 @@ pub trait LocallyCoherentLineageStore>: &ret ).is_some(), "lineage was activated")] #[debug_ensures( - self[&ret].indexed_location == old(lineage.indexed_location.clone()), + self[&ret].indexed_location == old(lineage.indexed_location), "lineage was added to indexed_location" )] #[debug_ensures( self.get_global_lineage_reference_at_indexed_location( - &old(lineage.indexed_location.clone()), old(habitat) + &old(lineage.indexed_location), old(habitat) ) == Some(&self[&ret].global_reference), "lineage is now indexed at indexed_location" )] @@ -117,7 +117,7 @@ pub trait GloballyCoherentLineageStore>: #[debug_ensures( self.get_local_lineage_references_at_location_unordered( - &old(lineage.indexed_location.location().clone()), old(habitat) + &old(*lineage.indexed_location.location()), old(habitat) ).last() == Some(&ret), "lineage is now indexed unordered at indexed_location.location()" )] @@ -125,7 +125,7 @@ pub trait GloballyCoherentLineageStore>: old(self.get_local_lineage_references_at_location_unordered( lineage.indexed_location.location(), old(habitat) ).len() + 1) == self.get_local_lineage_references_at_location_unordered( - &old(lineage.indexed_location.location().clone()), old(habitat) + &old(*lineage.indexed_location.location()), old(habitat) ).len(), "unordered active lineage index at given location has grown by 1" )] diff --git a/necsim/core/src/event.rs b/necsim/core/src/event.rs index 40108ae85..6fd2fd3c3 100644 --- a/necsim/core/src/event.rs +++ b/necsim/core/src/event.rs @@ -108,7 +108,7 @@ impl From for PackedEvent { global_lineage_reference: event.global_lineage_reference.clone(), prior_time: event.prior_time.get().make_negative(), event_time: event.event_time.get(), - origin: event.origin.clone(), + origin: event.origin, target: event.origin, coalescence: event.global_lineage_reference, } diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 1339938f3..12d8a0219 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -3,7 +3,7 @@ use necsim_core_bond::OffByOneU32; use super::Location; #[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[derive(PartialEq, Eq, Copy, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] #[repr(C)] @@ -58,7 +58,7 @@ impl LandscapeExtent { LocationIterator { x: self.x, y: self.y, - extent: self.clone(), + extent: *self, first_y: true, } } @@ -186,7 +186,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent: extent.clone(), + extent, first_y: true, } ); @@ -200,7 +200,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent: extent.clone(), + extent, first_y: false, } ); @@ -230,7 +230,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent: extent.clone(), + extent, first_y: true, } ); @@ -242,7 +242,7 @@ mod tests { LocationIterator { x: 0, y: 6812, - extent: extent.clone(), + extent, first_y: true, } ); @@ -255,7 +255,7 @@ mod tests { LocationIterator { x: 1386, y: 6813, - extent: extent.clone(), + extent, first_y: false, } ); @@ -269,7 +269,7 @@ mod tests { LocationIterator { x: 1386, y: 0, - extent: extent.clone(), + extent, first_y: false, } ); @@ -283,7 +283,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent: extent.clone(), + extent, first_y: false, } ); diff --git a/necsim/core/src/landscape/location.rs b/necsim/core/src/landscape/location.rs index c3686e5c6..7854775a9 100644 --- a/necsim/core/src/landscape/location.rs +++ b/necsim/core/src/landscape/location.rs @@ -1,10 +1,8 @@ use serde::{Deserialize, Serialize}; -use crate::cogs::Backup; - #[allow(clippy::unsafe_derive_deserialize)] #[derive( - Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, + Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize, TypeLayout, )] #[serde(deny_unknown_fields)] #[repr(C)] @@ -13,13 +11,6 @@ pub struct Location { y: u32, } -#[contract_trait] -impl Backup for Location { - unsafe fn backup_unchecked(&self) -> Self { - self.clone() - } -} - impl Location { #[must_use] pub const fn new(x: u32, y: u32) -> Self { @@ -44,7 +35,7 @@ impl From for Location { } #[derive( - Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, + Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize, TypeLayout, )] #[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] #[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 8e20ba0a5..1bf05b75a 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -16,6 +16,7 @@ use crate::{ }; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(transparent)] pub struct GlobalLineageReference(u64); @@ -94,23 +95,30 @@ impl From> for LineageInteraction { } } -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::unsafe_derive_deserialize, clippy::module_name_repetitions)] #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cuda(ignore)] +#[serde(deny_unknown_fields)] pub struct Lineage { + #[cuda(embed)] + #[cuda(ignore)] #[serde(alias = "id", alias = "ref")] pub global_reference: GlobalLineageReference, + #[cuda(ignore)] #[serde(alias = "time")] pub last_event_time: NonNegativeF64, + #[cuda(ignore)] #[serde(alias = "loc")] pub indexed_location: IndexedLocation, } impl Lineage { #[must_use] + #[allow(clippy::no_effect_underscore_binding)] #[debug_ensures( - ret.indexed_location == old(indexed_location.clone()), + ret.indexed_location == old(indexed_location), "stores the indexed_location" )] #[debug_ensures(ret.last_event_time == 0.0_f64, "starts at t_0 = 0.0")] @@ -178,8 +186,8 @@ impl Backup for MigratingLineage { unsafe fn backup_unchecked(&self) -> Self { Self { global_reference: self.global_reference.backup_unchecked(), - dispersal_origin: self.dispersal_origin.clone(), - dispersal_target: self.dispersal_target.clone(), + dispersal_origin: self.dispersal_origin, + dispersal_target: self.dispersal_target, prior_time: self.prior_time, event_time: self.event_time, coalescence_rng_sample: self.coalescence_rng_sample.backup_unchecked(), diff --git a/necsim/core/src/reporter/boolean.rs b/necsim/core/src/reporter/boolean.rs index 372b43db1..686330300 100644 --- a/necsim/core/src/reporter/boolean.rs +++ b/necsim/core/src/reporter/boolean.rs @@ -5,7 +5,7 @@ mod private { impl Sealed for super::False {} } -pub trait Boolean: private::Sealed { +pub trait Boolean: 'static + private::Sealed { const VALUE: bool; } diff --git a/necsim/core/src/simulation/builder.rs b/necsim/core/src/simulation/builder.rs index c73c112cb..013c39434 100644 --- a/necsim/core/src/simulation/builder.rs +++ b/necsim/core/src/simulation/builder.rs @@ -86,7 +86,7 @@ impl< } #[derive(Debug, TypeLayout)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] #[repr(C)] pub struct Simulation< diff --git a/necsim/core/src/simulation/process/immigration.rs b/necsim/core/src/simulation/process/immigration.rs index aa1c49b3c..93c20f2f5 100644 --- a/necsim/core/src/simulation/process/immigration.rs +++ b/necsim/core/src/simulation/process/immigration.rs @@ -68,7 +68,7 @@ impl< active_lineage_sampler.push_active_lineage( Lineage { global_reference: migrating_lineage.global_reference.clone(), - indexed_location: dispersal_target.clone(), + indexed_location: dispersal_target, last_event_time: migrating_lineage.event_time.into(), }, simulation, diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 677c9e908..0c6d9dd32 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index bc34a8f0f..671ebb79f 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -3,31 +3,35 @@ use core::marker::PhantomData; use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use const_type_layout::TypeGraphLayout; -use rust_cuda::safety::StackOnly; +use rust_cuda::safety::{PortableBitSemantics, StackOnly}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[allow(clippy::module_name_repetitions)] -#[derive(Debug, rust_cuda::common::LendRustToCuda)] +#[derive(Debug, rust_cuda::lend::LendRustToCuda)] #[cuda(free = "M", free = "R")] pub struct CudaRng where - R: RngCore + StackOnly + TypeGraphLayout, + R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, { inner: R, marker: PhantomData, } -impl + StackOnly + TypeGraphLayout> Clone for CudaRng { +impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone + for CudaRng +{ fn clone(&self) -> Self { Self { - inner: self.inner.clone(), + inner: self.inner, marker: PhantomData::, } } } -impl + StackOnly + TypeGraphLayout> From for CudaRng { +impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> + From for CudaRng +{ #[must_use] #[inline] fn from(rng: R) -> Self { @@ -38,7 +42,9 @@ impl + StackOnly + TypeGraphLayout> From for Cuda } } -impl + StackOnly + TypeGraphLayout> RngCore for CudaRng { +impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> + RngCore for CudaRng +{ type Seed = >::Seed; #[must_use] @@ -57,8 +63,10 @@ impl + StackOnly + TypeGraphLayout> RngCore for C } } -impl + StackOnly + TypeGraphLayout> PrimeableRng - for CudaRng +impl< + M: MathsCore, + R: PrimeableRng + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, + > PrimeableRng for CudaRng { #[inline] fn prime_with(&mut self, location_index: u64, time_index: u64) { @@ -66,14 +74,19 @@ impl + StackOnly + TypeGraphLayout> PrimeableRn } } -impl + StackOnly + TypeGraphLayout> Serialize for CudaRng { +impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> + Serialize for CudaRng +{ fn serialize(&self, serializer: S) -> Result { self.inner.serialize(serializer) } } -impl<'de, M: MathsCore, R: RngCore + StackOnly + TypeGraphLayout> Deserialize<'de> - for CudaRng +impl< + 'de, + M: MathsCore, + R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, + > Deserialize<'de> for CudaRng { fn deserialize>(deserializer: D) -> Result { let inner = R::deserialize(deserializer)?; diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index 6fb9f314f..d76f2d49e 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -1,7 +1,7 @@ use core::fmt; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; @@ -24,7 +24,7 @@ use necsim_core::impl_report; use super::utils::MaybeSome; #[allow(clippy::module_name_repetitions, clippy::type_complexity)] -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "ReportSpeciation", free = "ReportDispersal")] pub struct EventBuffer { #[cuda(embed)] @@ -43,8 +43,10 @@ pub struct EventBuffer { pub trait EventType { type Event: 'static - + rust_cuda::const_type_layout::TypeGraphLayout + + Sync + + rust_cuda::deps::const_type_layout::TypeGraphLayout + rust_cuda::safety::StackOnly + + rust_cuda::safety::PortableBitSemantics + Into + Into + Clone; diff --git a/necsim/impls/cuda/src/value_buffer.rs b/necsim/impls/cuda/src/value_buffer.rs index 04d844f6f..e20dd1c71 100644 --- a/necsim/impls/cuda/src/value_buffer.rs +++ b/necsim/impls/cuda/src/value_buffer.rs @@ -3,7 +3,7 @@ use core::iter::Iterator; use const_type_layout::TypeGraphLayout; use rust_cuda::{ - safety::StackOnly, + safety::{PortableBitSemantics, StackOnly}, utils::{ aliasing::SplitSliceOverCudaThreadsConstStride, exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, @@ -11,19 +11,19 @@ use rust_cuda::{ }; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; use super::utils::MaybeSome; -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "T")] #[allow(clippy::module_name_repetitions)] pub struct ValueBuffer where - T: StackOnly + TypeGraphLayout, + T: StackOnly + PortableBitSemantics + TypeGraphLayout, { #[cuda(embed)] mask: SplitSliceOverCudaThreadsConstStride, 1_usize>, @@ -33,7 +33,9 @@ where } #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ /// # Errors /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA pub fn new(block_size: &BlockSize, grid_size: &GridSize) -> CudaResult { @@ -67,7 +69,9 @@ impl ValueBuff } #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn iter(&self) -> impl Iterator> { self.mask .iter() @@ -90,7 +94,7 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl ValueBuffer { pub fn with_value_for_core) -> Option>(&mut self, inner: F) { let value = if self .mask @@ -117,7 +121,9 @@ impl ValueBuffer { } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn take_value_for_core(&mut self) -> Option { #[allow(clippy::option_if_let_else)] if let Some(mask) = self.mask.get_mut(0) { @@ -135,7 +141,9 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn put_value_for_core(&mut self, value: Option) { if let Some(mask) = self.mask.get_mut(0) { mask.write(value.is_some()); @@ -148,13 +156,15 @@ impl ValueBuffer } #[cfg(not(target_os = "cuda"))] -pub struct ValueRefMut<'v, T: StackOnly, const M2D: bool> { +pub struct ValueRefMut<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> { mask: &'v mut CudaExchangeItem, value: &'v mut CudaExchangeItem, M2D, true>, } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> + ValueRefMut<'v, T, M2D> +{ pub fn take(&mut self) -> Option { if *self.mask.read() { self.mask.write(false); @@ -176,7 +186,7 @@ impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly> ValueRefMut<'v, T, true> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout> ValueRefMut<'v, T, true> { #[must_use] pub fn as_mut(&mut self) -> Option<&mut T> { if *self.mask.read() { diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index d1cad4374..a726bf7d3 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/array2d.rs b/necsim/impls/no-std/src/array2d.rs index 14fe7fc83..dd4552ebe 100644 --- a/necsim/impls/no-std/src/array2d.rs +++ b/necsim/impls/no-std/src/array2d.rs @@ -10,12 +10,12 @@ use core::ops::{Index, IndexMut}; /// A fixed sized two-dimensional array. #[derive(Clone, Eq, PartialEq)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( free = "T", - bound = "T: rust_cuda::safety::StackOnly + const_type_layout::TypeGraphLayout" + bound = "T: rust_cuda::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout" ) )] pub struct Array2D { diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs index cecc4cd34..b9b31d60b 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs @@ -145,7 +145,7 @@ impl< match ordered_active_locations.last() { Some(location) if location == lineage.indexed_location.location() => (), - _ => ordered_active_locations.push(lineage.indexed_location.location().clone()), + _ => ordered_active_locations.push(*lineage.indexed_location.location()), }; let _local_reference = lineage_store diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs index 0ccbe5a34..eaa763420 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs @@ -153,7 +153,7 @@ impl< ) { self.last_event_time = lineage.last_event_time; - let location = lineage.indexed_location.location().clone(); + let location = *lineage.indexed_location.location(); let _lineage_reference = simulation .lineage_store diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs index b69bc20c0..598721483 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ConstEventTimeSampler { event_time: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs index 8b6bdc9c4..9e7b1207e 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ExpEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs index 5685d57fe..c6ac3227d 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct FixedEventTimeSampler([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs index be31a8a60..476685396 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct GeometricEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs index fcd1355ab..db7a42683 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct PoissonEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs index 1aafbee33..eb5243a48 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs @@ -25,7 +25,7 @@ use event_time_sampler::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct IndependentActiveLineageSampler< M: MathsCore, @@ -37,10 +37,7 @@ pub struct IndependentActiveLineageSampler< N: SpeciationProbability, J: EventTimeSampler, > { - #[cfg_attr( - feature = "cuda", - cuda(embed = "Option>") - )] + #[cfg_attr(feature = "cuda", cuda(embed))] active_lineage: Option, min_event_time: NonNegativeF64, last_event_time: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs index 0e9a16f6a..f15e3f672 100644 --- a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs @@ -15,7 +15,7 @@ use crate::cogs::lineage_store::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentCoalescenceSampler>(PhantomData<(M, H)>); diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs index 7b71b472b..3be2d4f16 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::almost_infinite::AlmostInfiniteHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct AlmostInfiniteNormalDispersalSampler> { sigma: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs index 5485cce7d..68e561bdf 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs @@ -40,7 +40,7 @@ impl From for Range { } #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct InMemoryPackedAliasDispersalSampler, G: RngCore> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs index 84bf3f84c..9d04d0636 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs @@ -23,11 +23,11 @@ impl, G: RngCore> DispersalSampler self.get_self_dispersal_probability_at_location(location, habitat); if self_dispersal_at_location >= 1.0_f64 { - return location.clone(); + return *location; } if self_dispersal_at_location > 0.0_f64 && rng.sample_event(self_dispersal_at_location) { - return location.clone(); + return *location; } self.sample_non_self_dispersal_from_location(location, habitat, rng) diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs index 23fbe2a0e..d2018d513 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::non_spatial::NonSpatialHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct NonSpatialDispersalSampler> { marker: PhantomData<(M, G)>, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs index 9664e50bb..744182261 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs @@ -11,7 +11,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs index 996dc2684..1ad63b0b7 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs @@ -28,7 +28,7 @@ pub trait AntiTrespassingDispersalSampler, G: RngCor #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct TrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs index 22e3216d2..26bef8225 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs @@ -9,7 +9,7 @@ use super::AntiTrespassingDispersalSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct UniformAntiTrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs index 5f38306db..632156d19 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs @@ -14,7 +14,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseApproximateNormalDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] @@ -57,7 +57,7 @@ impl> DispersalSampler, G // If seperable dispersal is not required, this can be implemented as a // direct rejection sampling loop instead. if rng.sample_event(self.get_self_dispersal_probability_at_location(location, habitat)) { - location.clone() + *location } else { self.sample_non_self_dispersal_from_location(location, habitat, rng) } diff --git a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs index 74a68fdda..62e5320a5 100644 --- a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs +++ b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs @@ -8,7 +8,7 @@ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverEmigrationExit([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs index 2775ad348..4e32a09c0 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs @@ -207,7 +207,7 @@ impl< let (dispersal_target, coalescence) = simulation .coalescence_sampler .sample_coalescence_at_location( - dispersal_origin.location().clone(), + *dispersal_origin.location(), &simulation.habitat, &simulation.lineage_store, CoalescenceRngSample::new(rng), diff --git a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs index 8d7ccaba2..38a97fb42 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs @@ -17,6 +17,7 @@ pub struct ProbabilityAtLocation { } impl ProbabilityAtLocation { + #[allow(clippy::trivially_copy_pass_by_ref)] pub fn new< M: MathsCore, H: Habitat, diff --git a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs index baeb01622..e3d51b674 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs @@ -21,7 +21,7 @@ use super::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( @@ -46,7 +46,7 @@ pub struct IndependentEventSampler< #[cfg_attr( feature = "cuda", cuda( - embed = "Option>" + embed = "Option>" ) )] min_spec_sample: Option, @@ -84,7 +84,7 @@ impl< { unsafe fn backup_unchecked(&self) -> Self { Self { - min_spec_sample: self.min_spec_sample.clone(), + min_spec_sample: self.min_spec_sample, marker: PhantomData::<(M, H, G, X, D, T, N)>, } } @@ -254,7 +254,7 @@ impl< ) -> Option { // `core::mem::replace()` would be semantically better // - but `clone()` does not spill to local memory - let old_value = self.min_spec_sample.clone(); + let old_value = self.min_spec_sample; self.min_spec_sample = new; diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index 8b5c1cccd..d541d21a4 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -26,7 +26,7 @@ pub trait MinSpeciationTrackingEventSampler< -> Option; } -#[derive(Clone, Debug, TypeLayout)] +#[derive(Clone, Copy, Debug, TypeLayout)] #[repr(C)] pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, @@ -47,7 +47,7 @@ impl SpeciationSample { *min_spec_sample = Some(Self { speciation_sample, sample_time, - sample_location: sample_location.clone(), + sample_location: *sample_location, }); }, }; diff --git a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs index 914672dbc..8eafa0c6d 100644 --- a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs +++ b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs @@ -12,7 +12,7 @@ const ALMOST_INFINITE_EXTENT: LandscapeExtent = LandscapeExtent::new(0, 0, OffByOneU32::max(), OffByOneU32::max()); #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct AlmostInfiniteHabitat { marker: PhantomData, diff --git a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs index 838ee499f..a1520550b 100644 --- a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs @@ -14,7 +14,7 @@ use crate::array2d::Array2D; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct InMemoryHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] @@ -31,7 +31,7 @@ impl Backup for InMemoryHabitat { Self { habitat: Final::new(self.habitat.clone()), u64_injection: Final::new(self.u64_injection.clone()), - extent: self.extent.clone(), + extent: self.extent, marker: PhantomData::, } } diff --git a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs index bbba06e66..edbdf23f1 100644 --- a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs @@ -11,7 +11,7 @@ use necsim_core_bond::{OffByOneU32, OffByOneU64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct NonSpatialHabitat { extent: LandscapeExtent, @@ -58,7 +58,7 @@ impl NonSpatialHabitat { impl Backup for NonSpatialHabitat { unsafe fn backup_unchecked(&self) -> Self { Self { - extent: self.extent.clone(), + extent: self.extent, deme: self.deme, marker: PhantomData::, } diff --git a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs index 5f78012e9..02c822977 100644 --- a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs @@ -13,7 +13,7 @@ const SPATIALLY_IMPLICIT_EXTENT: LandscapeExtent = #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs index e6482e557..f8a847121 100644 --- a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs +++ b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs @@ -18,7 +18,7 @@ use crate::cogs::{ }; #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] @@ -67,7 +67,7 @@ impl WrappingNoiseHabitat { samples.push(sum_noise_octaves::( &noise, - &Location::new( + Location::new( (location & 0x0000_0000_FFFF_FFFF) as u32, ((location >> 32) & 0x0000_0000_FFFF_FFFF) as u32, ), @@ -165,7 +165,7 @@ impl Habitat for WrappingNoiseHabitat { let noise = sum_noise_octaves::( &self.noise, - location, + *location, self.persistence, self.scale, self.octaves, @@ -217,7 +217,7 @@ impl SingletonDemesHabitat for WrappingNoiseHabitat {} // Published at https://cmaher.github.io/posts/working-with-simplex-noise/ fn sum_noise_octaves( noise: &OpenSimplexNoise, - location: &Location, + location: Location, persistence: PositiveUnitF64, scale: PositiveUnitF64, octaves: NonZeroUsize, diff --git a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs index fc148b60e..9c4df3ac8 100644 --- a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs +++ b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs @@ -5,7 +5,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverImmigrationEntry([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs index 95dee3b23..aa52e0783 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs @@ -50,7 +50,7 @@ impl> Backup for GillespieLineageStore { location_to_lineage_references: self .location_to_lineage_references .iter() - .map(|(k, v)| (k.clone(), v.iter().map(|x| x.backup_unchecked()).collect())) + .map(|(k, v)| (*k, v.iter().map(|x| x.backup_unchecked()).collect())) .collect(), indexed_location_to_lineage_reference: self .indexed_location_to_lineage_reference diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs index 35d94f6a9..350db3741 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs @@ -64,11 +64,11 @@ impl> LocallyCoherentLineageStore ) -> InMemoryLineageReference { let lineages_at_location = self .location_to_lineage_references - .entry(lineage.indexed_location.location().clone()) + .entry(*lineage.indexed_location.location()) .or_default(); self.indexed_location_to_lineage_reference.insert( - lineage.indexed_location.clone(), + lineage.indexed_location, (lineage.global_reference.clone(), lineages_at_location.len()), ); @@ -136,7 +136,7 @@ impl> GloballyCoherentLineageStore if references.is_empty() { None } else { - Some(location.clone()) + Some(*location) } }) } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs index 64fb4bfdb..f28d514c4 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs @@ -58,7 +58,7 @@ impl> Backup for SingletonDemesLineage location_to_lineage_reference: self .location_to_lineage_reference .iter() - .map(|(k, v)| (k.clone(), v.backup_unchecked())) + .map(|(k, v)| (*k, v.backup_unchecked())) .collect(), _marker: PhantomData::<(M, H)>, } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs index 4f62382fb..60340dcd6 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs @@ -64,7 +64,7 @@ impl> LocallyCoherentLineageStore InMemoryLineageReference { - let location = lineage.indexed_location.location().clone(); + let location = *lineage.indexed_location.location(); // Safety: a new unique reference is issued here, no cloning occurs let local_lineage_reference = @@ -109,7 +109,7 @@ impl> GloballyCoherentLineageStore> Backup for ClassicalLineageStore { indexed_location_to_lineage_reference: self .indexed_location_to_lineage_reference .iter() - .map(|(k, v)| (k.clone(), v.backup_unchecked())) + .map(|(k, v)| (*k, v.backup_unchecked())) .collect(), _marker: PhantomData::<(M, H)>, } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs index 978928377..c8bbd7ea7 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs @@ -58,7 +58,7 @@ impl> LocallyCoherentLineageStore lineage: Lineage, _habitat: &H, ) -> InMemoryLineageReference { - let indexed_location = lineage.indexed_location.clone(); + let indexed_location = lineage.indexed_location; // Safety: a new unique reference is issued here, no cloning occurs let local_lineage_reference = diff --git a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs index d20b0dbd1..606be853e 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs @@ -7,7 +7,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentLineageStore> { marker: PhantomData<(M, H)>, diff --git a/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs b/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs index 4ea417586..dac8b6c22 100644 --- a/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs @@ -113,7 +113,7 @@ impl<'h, M: MathsCore, I: Iterator> Iterator for InMemoryOriginSampl self.next_location_index += u32::try_from(index_difference).unwrap(); Some(Lineage::new( - IndexedLocation::new(next_location.clone(), self.next_location_index), + IndexedLocation::new(*next_location, self.next_location_index), self.habitat, )) } diff --git a/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs b/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs index 8cebb7036..d940a32c7 100644 --- a/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs @@ -106,7 +106,7 @@ impl<'h, M: MathsCore, I: Iterator> Iterator for NonSpatialOriginSam self.next_location_index += u32::try_from(index_difference).unwrap(); Some(Lineage::new( - IndexedLocation::new(next_location.clone(), self.next_location_index), + IndexedLocation::new(*next_location, self.next_location_index), self.habitat, )) } diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs index d50e77707..a542e24b1 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs @@ -7,7 +7,7 @@ use necsim_core_bond::{ClosedUnitF64, OpenClosedUnitF64 as PositiveUnitF64}; use crate::cogs::habitat::spatially_implicit::SpatiallyImplicitHabitat; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct SpatiallyImplicitSpeciationProbability { meta_speciation_probability: PositiveUnitF64, diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs index dd8d2dfae..82ceeeba7 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::ClosedUnitF64; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformSpeciationProbability { speciation_probability: ClosedUnitF64, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs index 6b83d35be..4fce34f0c 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs @@ -12,7 +12,7 @@ use crate::{array2d::Array2D, cogs::habitat::in_memory::InMemoryHabitat}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct InMemoryTurnoverRate { #[cfg_attr(feature = "cuda", cuda(embed))] turnover_rate: Final>, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs index 99411a19a..5255625bf 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformTurnoverRate { turnover_rate: PositiveF64, diff --git a/necsim/impls/no-std/src/decomposition/equal/area.rs b/necsim/impls/no-std/src/decomposition/equal/area.rs index 1e418c019..8d61e05f2 100644 --- a/necsim/impls/no-std/src/decomposition/equal/area.rs +++ b/necsim/impls/no-std/src/decomposition/equal/area.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by area, otherwise returns `Err(Self)`. pub fn area(habitat: &H, subdomain: Partition) -> Result { - let extent = habitat.get_extent().clone(); + let extent = *habitat.get_extent(); let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/necsim/impls/no-std/src/decomposition/equal/mod.rs b/necsim/impls/no-std/src/decomposition/equal/mod.rs index 885c88103..d1ed79319 100644 --- a/necsim/impls/no-std/src/decomposition/equal/mod.rs +++ b/necsim/impls/no-std/src/decomposition/equal/mod.rs @@ -34,7 +34,7 @@ impl> Backup for EqualDecomposition { unsafe fn backup_unchecked(&self) -> Self { Self { subdomain: self.subdomain, - extent: self.extent.clone(), + extent: self.extent, morton: self.morton, indices: self.indices.clone(), _marker: PhantomData::<(M, H)>, diff --git a/necsim/impls/no-std/src/decomposition/equal/weight.rs b/necsim/impls/no-std/src/decomposition/equal/weight.rs index cc5ec1e86..a28dbffe1 100644 --- a/necsim/impls/no-std/src/decomposition/equal/weight.rs +++ b/necsim/impls/no-std/src/decomposition/equal/weight.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by weight, otherwise returns `Err(Self)`. pub fn weight(habitat: &H, subdomain: Partition) -> Result { - let extent = habitat.get_extent().clone(); + let extent = *habitat.get_extent(); let mut total_habitat = 0; let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs index 75c83085d..d553ee6ec 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs @@ -197,7 +197,7 @@ pub fn simulate< prior_time, event_time, global_lineage_reference: global_reference.clone(), - target: dispersal_target.clone(), + target: dispersal_target, interaction: LineageInteraction::Maybe, }); diff --git a/necsim/plugins/species/src/individual/feather/dataframe.rs b/necsim/plugins/species/src/individual/feather/dataframe.rs index e817b9fb3..ff2085649 100644 --- a/necsim/plugins/species/src/individual/feather/dataframe.rs +++ b/necsim/plugins/species/src/individual/feather/dataframe.rs @@ -21,7 +21,7 @@ impl IndividualSpeciesFeatherReporter { lineage: &GlobalLineageReference, origin: &IndexedLocation, ) { - self.origins.insert(lineage.clone(), origin.clone()); + self.origins.insert(lineage.clone(), *origin); } pub(super) fn store_individual_speciation( diff --git a/necsim/plugins/species/src/individual/sqlite/database.rs b/necsim/plugins/species/src/individual/sqlite/database.rs index d472111eb..d932b240c 100644 --- a/necsim/plugins/species/src/individual/sqlite/database.rs +++ b/necsim/plugins/species/src/individual/sqlite/database.rs @@ -18,7 +18,7 @@ impl IndividualSpeciesSQLiteReporter { lineage: &GlobalLineageReference, origin: &IndexedLocation, ) { - self.origins.insert(lineage.clone(), origin.clone()); + self.origins.insert(lineage.clone(), *origin); } pub(super) fn store_individual_speciation( diff --git a/necsim/plugins/species/src/location/feather/dataframe.rs b/necsim/plugins/species/src/location/feather/dataframe.rs index 653472f6d..f376fb83a 100644 --- a/necsim/plugins/species/src/location/feather/dataframe.rs +++ b/necsim/plugins/species/src/location/feather/dataframe.rs @@ -26,9 +26,9 @@ impl LocationSpeciesFeatherReporter { pub(super) fn store_individual_origin( &mut self, lineage: &GlobalLineageReference, - origin: &Location, + origin: Location, ) { - self.origins.insert(lineage.clone(), origin.clone()); + self.origins.insert(lineage.clone(), origin); } pub(super) fn store_individual_speciation( @@ -126,7 +126,7 @@ impl LocationSpeciesFeatherReporter { HashMap::default(); for (origin, identity, count) in std::mem::take(&mut self.speciated) { - species_index.insert((origin.clone(), identity.clone()), counts.len()); + species_index.insert((origin, identity.clone()), counts.len()); xs.push(origin.x()); ys.push(origin.y()); @@ -158,7 +158,7 @@ impl LocationSpeciesFeatherReporter { let count = self.counts.get(&lineage).copied().unwrap_or(1_u64); if let Some(identity) = self.species.get(&ancestor) { - match species_index.entry((origin.clone(), identity.clone())) { + match species_index.entry((origin, identity.clone())) { // Update the existing per-location-species record Entry::Occupied(occupied) => counts[*occupied.get()] += count, // Create a new per-location-species record @@ -219,7 +219,7 @@ impl LocationSpeciesFeatherReporter { // No-longer activate lineages and the anchor may share // location-species records with each other - match species_index.entry((origin.clone(), anchor_identity.clone())) { + match species_index.entry((origin, anchor_identity.clone())) { // Update the existing per-location-species record Entry::Occupied(occupied) => counts[*occupied.get()] += count, // Create a new per-location-species record diff --git a/necsim/plugins/species/src/location/feather/reporter.rs b/necsim/plugins/species/src/location/feather/reporter.rs index e92ae4726..718a3a7f7 100644 --- a/necsim/plugins/species/src/location/feather/reporter.rs +++ b/necsim/plugins/species/src/location/feather/reporter.rs @@ -7,7 +7,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if speciation.prior_time == 0.0_f64 { - self.store_individual_origin(&speciation.global_lineage_reference, speciation.origin.location()); + self.store_individual_origin(&speciation.global_lineage_reference, *speciation.origin.location()); } if Some(speciation) == self.last_speciation_event.as_ref() { @@ -31,7 +31,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if dispersal.prior_time == 0.0_f64 { - self.store_individual_origin(&dispersal.global_lineage_reference, dispersal.origin.location()); + self.store_individual_origin(&dispersal.global_lineage_reference, *dispersal.origin.location()); } // Only update the active frontier with `deduplication_probability` diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs index ff717e346..be7f7423f 100644 --- a/necsim/plugins/tskit/src/tree/table.rs +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -17,7 +17,7 @@ impl TskitTreeReporter { reference: &GlobalLineageReference, location: &IndexedLocation, ) { - self.origins.insert(reference.clone(), location.clone()); + self.origins.insert(reference.clone(), *location); } pub(super) fn store_individual_speciation( diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index e25ab2387..cc0711eb0 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 7587473ae..a6f9a93e7 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 2a13df0fc..bd2d815ab 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -16,4 +16,8 @@ necsim-core-bond = { path = "../../../../necsim/core/bond" } necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["cuda"] } necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"] } +[target.'cfg(target_os = "cuda")'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "device", "kernel"] } + +[target.'cfg(not(target_os = "cuda"))'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 1e5724d9c..f0b30b6a2 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -1,12 +1,14 @@ #![deny(clippy::pedantic)] #![no_std] +#![feature(type_alias_impl_trait)] +#![feature(decl_macro)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] -#![cfg_attr(target_os = "cuda", feature(panic_info_message))] -#![cfg_attr(target_os = "cuda", feature(atomic_from_mut))] -#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] -#![cfg_attr(target_os = "cuda", feature(stdsimd))] -#![cfg_attr(target_os = "cuda", feature(control_flow_enum))] +// #![cfg_attr(target_os = "cuda", feature(panic_info_message))] +// #![cfg_attr(target_os = "cuda", feature(atomic_from_mut))] +// #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] +// #![cfg_attr(target_os = "cuda", feature(stdsimd))] +// #![cfg_attr(target_os = "cuda", feature(control_flow_enum))] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] @@ -28,66 +30,73 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; -#[rust_cuda::common::kernel( - pub use link_kernel! as impl SimulatableKernel for SimulationKernel +#[rust_cuda::kernel::kernel( + pub use link_kernel! for impl )] #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] pub fn simulate< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - simulation: &mut ShallowCopy< - necsim_core::simulation::Simulation, + simulation: & /* mut */ + rust_cuda::kernel::param::PtxJit< + rust_cuda::kernel::param::DeepPerThreadBorrow< + necsim_core::simulation::Simulation, + >, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - task_list: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, + task_list: & /* mut */ + rust_cuda::kernel::param::PtxJit< + rust_cuda::kernel::param::DeepPerThreadBorrow< + necsim_impls_cuda::value_buffer::ValueBuffer, + >, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - event_buffer_reporter: &mut ShallowCopy< - necsim_impls_cuda::event_buffer::EventBuffer, + event_buffer_reporter: & /* mut */ + rust_cuda::kernel::param::PtxJit< + rust_cuda::kernel::param::DeepPerThreadBorrow< + necsim_impls_cuda::event_buffer::EventBuffer, + >, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - min_spec_sample_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, + min_spec_sample_buffer: & /* mut */ + rust_cuda::kernel::param::PtxJit< + rust_cuda::kernel::param::DeepPerThreadBorrow< + necsim_impls_cuda::value_buffer::ValueBuffer, + >, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - next_event_time_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, + next_event_time_buffer: & /* mut */ + rust_cuda::kernel::param::PtxJit< + rust_cuda::kernel::param::DeepPerThreadBorrow< + necsim_impls_cuda::value_buffer::ValueBuffer< + necsim_core_bond::PositiveF64, + false, + true, + >, + >, + >, + total_time_max: &rust_cuda::kernel::param::ShallowInteriorMutable< + core::sync::atomic::AtomicU64, + >, + total_steps_sum: &rust_cuda::kernel::param::ShallowInteriorMutable< + core::sync::atomic::AtomicU64, + >, + max_steps: rust_cuda::kernel::param::PerThreadShallowCopy, + max_next_event_time: rust_cuda::kernel::param::PerThreadShallowCopy< + necsim_core_bond::NonNegativeF64, >, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_time_max: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_steps_sum: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_steps: u64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_next_event_time: necsim_core_bond::NonNegativeF64, ) { task_list.with_value_for_core(|task| { // Discard the prior task (the simulation is just a temporary local copy) @@ -133,37 +142,32 @@ pub fn simulate< #[cfg(target_os = "cuda")] mod cuda_prelude { - use core::arch::nvptx; - - use rust_cuda::device::utils; + use rust_cuda::device::alloc::PTXAllocator; #[global_allocator] - static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator; + static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator; #[cfg(not(debug_assertions))] #[panic_handler] fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::exit() } #[cfg(debug_assertions)] #[panic_handler] - fn panic(panic_info: &::core::panic::PanicInfo) -> ! { - use rust_cuda::println; - - println!( - "Panic occurred at {:?}: {:?}!", - panic_info.location(), - panic_info - .message() - .unwrap_or(&format_args!("unknown reason")) - ); - - unsafe { nvptx::trap() } + fn panic(info: &::core::panic::PanicInfo) -> ! { + rust_cuda::device::utils::pretty_panic_handler(info, true, true) } + #[cfg(not(debug_assertions))] #[alloc_error_handler] fn alloc_error_handler(_: core::alloc::Layout) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::exit() + } + + #[cfg(debug_assertions)] + #[alloc_error_handler] + fn alloc_error_handler(layout: core::alloc::Layout) -> ! { + rust_cuda::device::utils::pretty_alloc_error_handler(layout) } } diff --git a/rustcoalescence/algorithms/cuda/src/cuda.rs b/rustcoalescence/algorithms/cuda/src/cuda.rs index c523bf2d2..d8222ebb1 100644 --- a/rustcoalescence/algorithms/cuda/src/cuda.rs +++ b/rustcoalescence/algorithms/cuda/src/cuda.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{Context, CurrentContext, ResourceLimit}, prelude::*, }; @@ -13,7 +13,7 @@ pub fn with_initialised_cuda, F: FnOnce() -> Result> inner: F, ) -> Result { // Initialize the CUDA API - rust_cuda::rustacuda::init(CudaFlags::empty())?; + rust_cuda::deps::rustacuda::init(CudaFlags::empty())?; // Get the first device let device = Device::get_device(device)?; diff --git a/rustcoalescence/algorithms/cuda/src/error.rs b/rustcoalescence/algorithms/cuda/src/error.rs index e69898247..f81a9e3c1 100644 --- a/rustcoalescence/algorithms/cuda/src/error.rs +++ b/rustcoalescence/algorithms/cuda/src/error.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::error::CudaError as RustaCudaError; +use rust_cuda::deps::rustacuda::error::CudaError as RustaCudaError; use serde::{Deserialize, Serialize}; #[derive(thiserror::Error, Debug, Clone, Serialize, Deserialize)] diff --git a/rustcoalescence/algorithms/cuda/src/info.rs b/rustcoalescence/algorithms/cuda/src/info.rs index 1abf4ec07..78a5452ea 100644 --- a/rustcoalescence/algorithms/cuda/src/info.rs +++ b/rustcoalescence/algorithms/cuda/src/info.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{CurrentContext, ResourceLimit}, function::{Function, FunctionAttribute}, }; diff --git a/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs index 661b792d9..e47854154 100644 --- a/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs @@ -193,7 +193,7 @@ where global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location.clone(), + origin: coalescing_lineage.indexed_location, target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs index 1011efbb5..e70408ff3 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs @@ -151,7 +151,7 @@ impl, M: MathsCore, G: RngCore, O: Scena global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location.clone(), + origin: coalescing_lineage.indexed_location, target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs index 7fc69036f..1dd66092c 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs @@ -170,7 +170,7 @@ impl, M: MathsCore, G: RngCore, O: Scena global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location.clone(), + origin: coalescing_lineage.indexed_location, target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) From 29b098ebe8b092f460dbcfa9a6bf9522b5feaf2c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 8 Jan 2024 12:06:56 +0000 Subject: [PATCH 02/28] Some progress --- Cargo.lock | 6 +- necsim/core/Cargo.toml | 4 +- necsim/core/src/lineage.rs | 10 +- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/no-std/Cargo.toml | 4 +- necsim/impls/no-std/src/cogs/rng/seahash.rs | 9 +- necsim/impls/no-std/src/cogs/rng/wyhash.rs | 9 +- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/src/lib.rs | 189 ++---------------- .../algorithms/cuda/cpu-kernel/src/link.rs | 12 +- .../algorithms/cuda/cpu-kernel/src/patch.rs | 134 +++---------- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- .../algorithms/cuda/gpu-kernel/src/lib.rs | 93 ++++----- .../algorithms/cuda/src/initialiser/fixup.rs | 2 +- .../cuda/src/initialiser/genesis.rs | 2 +- .../algorithms/cuda/src/initialiser/mod.rs | 2 +- .../algorithms/cuda/src/initialiser/resume.rs | 2 +- rustcoalescence/algorithms/cuda/src/launch.rs | 48 +++-- rustcoalescence/algorithms/cuda/src/lib.rs | 18 +- .../cuda/src/parallelisation/monolithic.rs | 18 +- 21 files changed, 172 insertions(+), 402 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e14cc09f2..0c69548d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" +source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" +source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=fc18c79#fc18c7908f94ebc1e76ba5b722ffe7118b618035" +source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index a800856e5..a3f687c7b 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"], optional = true } diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 1bf05b75a..24e72049f 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -99,17 +99,17 @@ impl From> for LineageInteraction { #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)] #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] -#[cuda(ignore)] +#[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(deny_unknown_fields)] pub struct Lineage { - #[cuda(embed)] - #[cuda(ignore)] + #[cfg_attr(feature = "cuda", cuda(embed))] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "id", alias = "ref")] pub global_reference: GlobalLineageReference, - #[cuda(ignore)] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "time")] pub last_event_time: NonNegativeF64, - #[cuda(ignore)] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "loc")] pub indexed_location: IndexedLocation, } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 0c6d9dd32..3a6bd5f2b 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index a726bf7d3..7c2c254f6 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 93cc87ecd..6487a3531 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -1,6 +1,6 @@ use core::marker::PhantomData; -use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; +use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; @@ -17,12 +17,7 @@ pub struct SeaHash { marker: PhantomData, } -#[contract_trait] -impl Backup for SeaHash { - unsafe fn backup_unchecked(&self) -> Self { - self.clone() - } -} +impl Copy for SeaHash {} impl RngCore for SeaHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index c4fdeed68..df86272f6 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -1,6 +1,6 @@ use core::marker::PhantomData; -use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; +use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; @@ -23,12 +23,7 @@ pub struct WyHash { marker: PhantomData, } -#[contract_trait] -impl Backup for WyHash { - unsafe fn backup_unchecked(&self) -> Self { - self.clone() - } -} +impl Copy for WyHash {} impl RngCore for WyHash { type Seed = [u8; 8]; diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index cc0711eb0..cba6edccd 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index a6f9a93e7..23f9a9bdd 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 5c908339e..864726a58 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,6 +1,6 @@ #![deny(clippy::pedantic)] -#![allow(incomplete_features)] -#![feature(specialization)] +#![feature(c_str_literals)] +#![feature(min_specialization)] #![recursion_limit = "1024"] use necsim_core::{ @@ -16,178 +16,25 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::RustToCuda, - host::{CudaDropWrapper, LaunchConfig, LaunchPackage, Launcher, TypedKernel}, - rustacuda::{ - error::CudaResult, - function::{BlockSize, Function, GridSize}, - stream::Stream, - }, -}; - -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rust_cuda::lend::RustToCuda; mod link; mod patch; -pub type KernelCompilationCallback = dyn FnMut(&Function) -> CudaResult<()>; - -#[allow(clippy::module_name_repetitions)] -pub struct SimulationKernel< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, -> { - #[allow(clippy::type_complexity)] - kernel: TypedKernel< - dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - >, - stream: CudaDropWrapper, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - watcher: Box, -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > SimulationKernel -{ - /// # Errors - /// - /// Returns a `CudaError` if loading the CUDA kernel failed. - pub fn try_new( - stream: Stream, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - on_compile: Box, - ) -> CudaResult - where - Self: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - { - let stream = CudaDropWrapper::from(stream); - let kernel = Self::new_kernel()?; - - Ok(Self { - kernel, - stream, - grid, - block, - ptx_jit, - watcher: on_compile, - }) - } -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > Launcher - for SimulationKernel -{ - type CompilationWatcher = Box; - type KernelTraitObject = dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >; - - fn get_launch_package(&mut self) -> LaunchPackage { - LaunchPackage { - config: LaunchConfig { - grid: self.grid.clone(), - block: self.block.clone(), - shared_memory_size: 0_u32, - ptx_jit: self.ptx_jit, - }, - - kernel: &mut self.kernel, - stream: &mut self.stream, - - watcher: &mut self.watcher, - } - } - - fn on_compile(kernel: &Function, watcher: &mut Self::CompilationWatcher) -> CudaResult<()> { - (watcher)(kernel) - } -} +>(std::marker::PhantomData<(M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal)>); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs index 98f3b0819..d519b533a 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs @@ -1,6 +1,4 @@ -use rustcoalescence_algorithms_cuda_gpu_kernel::{SimulatableKernel, SimulationKernelArgs}; - -use crate::SimulationKernel; +use crate::SimulationKernelPtx; macro_rules! link_kernel { ($habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty) => { @@ -29,7 +27,7 @@ macro_rules! link_kernel { $habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty, $report_speciation:ty, $report_dispersal:ty ) => { - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -82,9 +80,9 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -197,7 +195,7 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } }; } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 129565624..9aa8fb7f8 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -1,31 +1,22 @@ -use std::sync::atomic::AtomicU64; +use std::ffi::CStr; use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, - lineage::Lineage, reporter::boolean::{Boolean, False, True}, - simulation::Simulation, }; -use necsim_core_bond::{NonNegativeF64, PositiveF64}; -use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, - event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, + event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::{DeviceAccessible, RustToCuda}, - host::{HostAndDeviceConstRef, HostAndDeviceMutRef, TypedKernel}, - rustacuda::error::CudaResult, - utils::device_copy::SafeDeviceCopyWrapper, -}; +use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx}; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use crate::SimulationKernel; +use crate::SimulationKernelPtx; // If `Kernel` is implemented for `ReportSpeciation` x `ReportDispersal`, i.e. // for {`False`, `True`} x {`False`, `True`} then it is implemented for all @@ -38,104 +29,37 @@ extern "C" { #[allow(clippy::trait_duplication_in_bounds)] unsafe impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, - > SimulatableKernel - for SimulationKernel + > CompiledKernelPtx> + for SimulationKernelPtx where - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, + SimulationKernelPtx: + CompiledKernelPtx>, + SimulationKernelPtx: + CompiledKernelPtx>, + SimulationKernelPtx: + CompiledKernelPtx>, + SimulationKernelPtx: + CompiledKernelPtx>, { - default fn get_ptx_str() -> &'static str { - unsafe { unreachable_cuda_simulation_linking_reporter() } - } - - default fn new_kernel() -> CudaResult< - TypedKernel< - dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - >, - > { - unsafe { unreachable_cuda_simulation_linking_reporter() } - } - - default fn simulate( - &mut self, - _simulation: &mut Simulation, - _task_list: &mut ValueBuffer, - _event_buffer_reporter: &mut EventBuffer, - _min_spec_sample_buffer: &mut ValueBuffer, - _next_event_time_buffer: &mut ValueBuffer, - _total_time_max: &AtomicU64, - _total_steps_sum: &AtomicU64, - _max_steps: u64, - _max_next_event_time: NonNegativeF64, - ) -> CudaResult<()> { + default fn get_ptx() -> &'static CStr { unsafe { unreachable_cuda_simulation_linking_reporter() } } - default fn simulate_raw( - &mut self, - _simulation: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _task_list: HostAndDeviceMutRef< - DeviceAccessible< as RustToCuda>::CudaRepresentation>, - >, - _event_buffer_reporter: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _min_spec_sample_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _next_event_time_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _total_time_max: HostAndDeviceConstRef>, - _total_steps_sum: HostAndDeviceConstRef>, - _max_steps: SafeDeviceCopyWrapper, - _max_next_event_time: SafeDeviceCopyWrapper, - ) -> CudaResult<()> { + default fn get_entry_point() -> &'static CStr { unsafe { unreachable_cuda_simulation_linking_reporter() } } } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index bd2d815ab..f625e166b 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "fc18c79", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index f0b30b6a2..8abbe9b12 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -2,6 +2,7 @@ #![no_std] #![feature(type_alias_impl_trait)] #![feature(decl_macro)] +#![feature(c_str_literals)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] // #![cfg_attr(target_os = "cuda", feature(panic_info_message))] @@ -17,7 +18,7 @@ extern crate alloc; #[cfg(target_os = "cuda")] use core::ops::ControlFlow; -use necsim_core::{ +pub use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, @@ -25,15 +26,17 @@ use necsim_core::{ reporter::boolean::Boolean, }; -use necsim_impls_no_std::cogs::{ +pub use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; -use rust_cuda::lend::RustToCuda; +pub use rust_cuda::lend::RustToCuda; -#[rust_cuda::kernel::kernel( - pub use link_kernel! for impl +#[rust_cuda::kernel::kernel(pub use link! for impl)] +#[kernel( + allow(ptx::double_precision_use), + forbid(ptx::local_memory_usage, ptx::register_spills) )] #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] @@ -98,46 +101,46 @@ pub fn simulate< necsim_core_bond::NonNegativeF64, >, ) { - task_list.with_value_for_core(|task| { - // Discard the prior task (the simulation is just a temporary local copy) - core::mem::drop( - simulation - .active_lineage_sampler_mut() - .replace_active_lineage(task), - ); - - // Discard the prior sample (the simulation is just a temporary local copy) - simulation.event_sampler_mut().replace_min_speciation(None); - - let mut final_next_event_time = None; - - let (time, steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { - final_next_event_time = Some(next_event_time); - - if steps >= max_steps || next_event_time >= max_next_event_time { - ControlFlow::Break(()) - } else { - ControlFlow::Continue(()) - } - }, - event_buffer_reporter, - ); - - next_event_time_buffer.put_value_for_core(final_next_event_time); - - if steps > 0 { - total_time_max.fetch_max(time.get().to_bits(), core::sync::atomic::Ordering::Relaxed); - total_steps_sum.fetch_add(steps, core::sync::atomic::Ordering::Relaxed); - } - - min_spec_sample_buffer - .put_value_for_core(simulation.event_sampler_mut().replace_min_speciation(None)); - - simulation - .active_lineage_sampler_mut() - .replace_active_lineage(None) - }); + // task_list.with_value_for_core(|task| { + // // Discard the prior task (the simulation is just a temporary local copy) + // core::mem::drop( + // simulation + // .active_lineage_sampler_mut() + // .replace_active_lineage(task), + // ); + + // // Discard the prior sample (the simulation is just a temporary local copy) + // simulation.event_sampler_mut().replace_min_speciation(None); + + // let mut final_next_event_time = None; + + // let (time, steps) = simulation.simulate_incremental_early_stop( + // |_, steps, next_event_time| { + // final_next_event_time = Some(next_event_time); + + // if steps >= max_steps || next_event_time >= max_next_event_time { + // ControlFlow::Break(()) + // } else { + // ControlFlow::Continue(()) + // } + // }, + // event_buffer_reporter, + // ); + + // next_event_time_buffer.put_value_for_core(final_next_event_time); + + // if steps > 0 { + // total_time_max.fetch_max(time.get().to_bits(), core::sync::atomic::Ordering::Relaxed); + // total_steps_sum.fetch_add(steps, core::sync::atomic::Ordering::Relaxed); + // } + + // min_spec_sample_buffer + // .put_value_for_core(simulation.event_sampler_mut().replace_min_speciation(None)); + + // simulation + // .active_lineage_sampler_mut() + // .replace_active_lineage(None) + // }); } #[cfg(target_os = "cuda")] diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs index 06401c685..006670903 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs @@ -28,7 +28,7 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs index 5f851c286..0775bf6bd 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs @@ -14,7 +14,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs index a1a39e87e..cbc997aaa 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs index 2cba7640b..6102b7da9 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_algorithms::result::ResumeError; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 12589699a..4a6390d9e 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -25,15 +25,15 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{ - common::RustToCuda, - rustacuda::{ + lend::RustToCuda, + deps::rustacuda::{ function::{BlockSize, GridSize}, prelude::{Stream, StreamFlags}, - }, + }, kernel::{CompiledKernelPtx, TypedPtxKernel, LaunchConfig, Launcher}, host::CudaDropWrapper, }; use crate::{ @@ -71,7 +71,7 @@ where RustToCuda, O::TurnoverRate: RustToCuda, O::SpeciationProbability: RustToCuda, - SimulationKernel< + SimulationKernelPtx< M, O::Habitat, CudaRng>, @@ -94,7 +94,7 @@ where L::ActiveLineageSampler, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< + >: CompiledKernelPtx>, @@ -117,7 +117,7 @@ where L::ActiveLineageSampler, R::ReportSpeciation, R::ReportDispersal, - >, + >>, { let ( habitat, @@ -196,21 +196,29 @@ where }; let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { - let kernel = SimulationKernel::try_new( - Stream::new(StreamFlags::NON_BLOCKING, None)?, - grid_size.clone(), - block_size.clone(), - args.ptx_jit, - Box::new(|kernel| { - crate::info::print_kernel_function_attributes("simulate", kernel); - Ok(()) - }), - )?; + let stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); + + let mut kernel = TypedPtxKernel::new(Some(Box::new(|kernel| { + crate::info::print_kernel_function_attributes("simulate", kernel); + Ok(()) + }))); + + let config = LaunchConfig { + grid: grid_size, + block: block_size, + ptx_jit: args.ptx_jit, + }; + + let launcher = Launcher { + stream: &stream, + kernel: &mut kernel, + config, + }; parallelisation::monolithic::simulate( &mut simulation, - kernel, - (grid_size, block_size, args.dedup_cache, args.step_slice), + launcher, + (args.dedup_cache, args.step_slice), lineages, event_slice, pause_before, diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index e2c221dca..93819cf23 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -37,10 +37,10 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; +use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; -use rust_cuda::common::RustToCuda; +use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx}; mod arguments; mod cuda; @@ -84,7 +84,7 @@ where RustToCuda, O::TurnoverRate: RustToCuda, O::SpeciationProbability: RustToCuda, - SimulationKernel< + SimulationKernelPtx< M, O::Habitat, CudaRng>, @@ -122,7 +122,7 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< + >: CompiledKernelPtx>, @@ -160,8 +160,8 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >, - SimulationKernel< + >>, + SimulationKernelPtx< M, O::Habitat, CudaRng>, @@ -217,7 +217,7 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< + >: CompiledKernelPtx>, @@ -273,7 +273,7 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >, + >>, { type LineageStore = IndependentLineageStore; type Rng = CudaRng>; diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index 66e1ff479..6162925f4 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -1,10 +1,10 @@ use std::{collections::VecDeque, convert::TryInto, num::NonZeroU64, sync::atomic::AtomicU64}; use rust_cuda::{ - common::RustToCuda, - host::{HostAndDeviceMutRef, LendToCuda}, - rustacuda::function::{BlockSize, GridSize}, - utils::exchange::wrapper::ExchangeWrapperOnHost, + lend::{RustToCuda, LendToCuda}, + host::HostAndDeviceMutRef, + deps::rustacuda::function::{BlockSize, GridSize}, + utils::exchange::wrapper::ExchangeWrapperOnHost, kernel::Launcher, }; use necsim_core::{ @@ -37,8 +37,8 @@ use necsim_partitioning_core::LocalPartition; use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use crate::error::CudaError; @@ -66,7 +66,7 @@ pub fn simulate< LI: IntoIterator, >( simulation: &mut Simulation, - mut kernel: SimulationKernel< + mut launcher: Launcher>::WaterLevelReporter as Reporter>::ReportSpeciation, <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, - config: (GridSize, BlockSize, DedupCache, NonZeroU64), + >>, + config: (DedupCache, NonZeroU64), lineages: LI, event_slice: EventSlice, pause_before: Option, From 52f4ce2603a1aece48b6e275116a5f2f23cb9fd2 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 9 Jan 2024 07:14:10 +0000 Subject: [PATCH 03/28] Further async integration progress, rustcoalescence fails to compile --- Cargo.lock | 6 +- necsim/core/Cargo.toml | 4 +- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/cuda/src/event_buffer.rs | 163 +++++++++----- necsim/impls/cuda/src/value_buffer.rs | 17 +- necsim/impls/no-std/Cargo.toml | 4 +- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/src/lib.rs | 19 +- .../algorithms/cuda/cpu-kernel/src/patch.rs | 8 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- .../algorithms/cuda/gpu-kernel/src/lib.rs | 106 +++++---- .../algorithms/cuda/src/initialiser/fixup.rs | 24 ++- .../cuda/src/initialiser/genesis.rs | 22 +- .../algorithms/cuda/src/initialiser/mod.rs | 24 ++- .../algorithms/cuda/src/initialiser/resume.rs | 24 ++- rustcoalescence/algorithms/cuda/src/launch.rs | 94 ++++---- rustcoalescence/algorithms/cuda/src/lib.rs | 202 +++++++++++------- .../cuda/src/parallelisation/monolithic.rs | 175 ++++++++------- 19 files changed, 517 insertions(+), 387 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c69548d1..39a949033 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" +source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" +source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=8dc0c6d#8dc0c6df52348fd119230ca8f1a4edc9562a1f86" +source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index a3f687c7b..ef8b0dccc 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 3a6bd5f2b..984ba4a50 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index d76f2d49e..2527ddef0 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -1,13 +1,19 @@ use core::fmt; +use const_type_layout::TypeGraphLayout; #[cfg(not(target_os = "cuda"))] use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; -use rust_cuda::utils::{ - aliasing::SplitSliceOverCudaThreadsDynamicStride, exchange::buffer::CudaExchangeBuffer, +use rust_cuda::{ + lend::RustToCudaProxy, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, + utils::{ + aliasing::SplitSliceOverCudaThreadsDynamicStride, + exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, + }, }; use necsim_core::{ @@ -27,8 +33,13 @@ use super::utils::MaybeSome; #[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "ReportSpeciation", free = "ReportDispersal")] pub struct EventBuffer { + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_mask: SplitSliceOverCudaThreadsDynamicStride>, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride>")] + event_mask: CudaExchangeSlice>, + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_buffer: SplitSliceOverCudaThreadsDynamicStride< CudaExchangeBuffer< @@ -37,8 +48,41 @@ pub struct EventBuffer { true, >, >, - max_events: usize, - event_counter: usize, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, +>")] + event_buffer: CudaExchangeSlice< + CudaExchangeItem< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >, +} + +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since EventBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - EventBuffer has no shallow mutable state +unsafe impl SafeMutableAliasing + for EventBuffer +where + SplitSliceOverCudaThreadsDynamicStride>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >: SafeMutableAliasing, +{ } pub trait EventType { @@ -78,10 +122,7 @@ impl fmt::Debug for EventBuffer { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("EventBuffer") - .field("max_events", &self.max_events) - .field("event_counter", &self.event_counter) - .finish_non_exhaustive() + fmt.debug_struct("EventBuffer").finish_non_exhaustive() } } @@ -122,8 +163,6 @@ impl CudaExchangeBuffer::from_vec(event_buffer)?, max_events, ), - max_events, - event_counter: 0_usize, }) } @@ -148,9 +187,26 @@ impl mask.write(false); } } +} - pub fn max_events_per_individual(&self) -> usize { - self.max_events +#[cfg(target_os = "cuda")] +impl + EventBuffer +{ + fn report_event( + &mut self, + event: impl Into< as EventType>::Event>, + ) { + if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = ( + core::mem::take(&mut self.event_mask.0), + core::mem::take(&mut self.event_buffer.0), + ) { + mask.write(true); + buffer.write(MaybeSome::Some(event.into())); + + self.event_mask.0 = mask_rest; + self.event_buffer.0 = buffer_rest; + } } } @@ -169,19 +225,11 @@ impl Reporter impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + !self.event_buffer.0.is_empty(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } @@ -190,19 +238,14 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter == 0, + !self.event_buffer.0.is_empty(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(0) { - mask.write(true); + self.report_event(event.clone()); - unsafe { - self.event_buffer.get_unchecked_mut(0) - }.write(MaybeSome::Some(event.clone())); - } - - self.event_counter = self.max_events; + self.event_mask.0 = &mut []; + self.event_buffer.0 = &mut []; } ); } @@ -211,37 +254,57 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + !self.event_buffer.0.is_empty(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); + self.report_event(event.clone()); - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter = self.max_events; + self.event_mask.0 = &mut []; + self.event_buffer.0 = &mut []; } ); impl_report!( #[debug_requires( - self.event_counter < self.max_events, + !self.event_buffer.0.is_empty(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } + +// FIXME: find a less hacky hack +struct CudaExchangeSlice( + &'static mut [T], +); + +impl< + T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout, + const M2D: bool, + const M2H: bool, + > RustToCudaProxy>> + for SplitSliceOverCudaThreadsDynamicStride> +{ + fn from_ref(_val: &CudaExchangeSlice>) -> &Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn from_mut(_val: &mut CudaExchangeSlice>) -> &mut Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn into(mut self) -> CudaExchangeSlice> { + let slice: &mut [CudaExchangeItem] = &mut self; + + let slice = unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) }; + + CudaExchangeSlice(slice) + } +} + +extern "C" { + fn unreachable_cuda_event_buffer_hack() -> !; +} diff --git a/necsim/impls/cuda/src/value_buffer.rs b/necsim/impls/cuda/src/value_buffer.rs index e20dd1c71..b1dc71f1a 100644 --- a/necsim/impls/cuda/src/value_buffer.rs +++ b/necsim/impls/cuda/src/value_buffer.rs @@ -3,7 +3,7 @@ use core::iter::Iterator; use const_type_layout::TypeGraphLayout; use rust_cuda::{ - safety::{PortableBitSemantics, StackOnly}, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, utils::{ aliasing::SplitSliceOverCudaThreadsConstStride, exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, @@ -32,6 +32,21 @@ where SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>, } +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since ValueBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - ValueBuffer has no shallow mutable state +unsafe impl + SafeMutableAliasing for ValueBuffer +where + SplitSliceOverCudaThreadsConstStride, 1_usize>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>: + SafeMutableAliasing, +{ +} + #[cfg(not(target_os = "cuda"))] impl ValueBuffer diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 7c2c254f6..07f88df49 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index cba6edccd..b51090971 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 23f9a9bdd..ae5937ec8 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 864726a58..cb570a68c 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -37,4 +37,21 @@ pub struct SimulationKernelPtx< A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, ->(std::marker::PhantomData<(M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal)>); +>( + std::marker::PhantomData<( + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + )>, +); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 9aa8fb7f8..828562714 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -12,7 +12,7 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx}; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; @@ -43,8 +43,10 @@ unsafe impl< A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, - > CompiledKernelPtx> - for SimulationKernelPtx + > + CompiledKernelPtx< + simulate, + > for SimulationKernelPtx where SimulationKernelPtx: CompiledKernelPtx>, diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index f625e166b..f0a8873c0 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 8abbe9b12..292aab1ec 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -5,11 +5,6 @@ #![feature(c_str_literals)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] -// #![cfg_attr(target_os = "cuda", feature(panic_info_message))] -// #![cfg_attr(target_os = "cuda", feature(atomic_from_mut))] -// #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] -// #![cfg_attr(target_os = "cuda", feature(stdsimd))] -// #![cfg_attr(target_os = "cuda", feature(control_flow_enum))] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] @@ -18,6 +13,7 @@ extern crate alloc; #[cfg(target_os = "cuda")] use core::ops::ControlFlow; +// FIXME: why pub use? pub use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, @@ -26,11 +22,13 @@ pub use necsim_core::{ reporter::boolean::Boolean, }; +// FIXME: why pub use? pub use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; +// FIXME: why pub use? pub use rust_cuda::lend::RustToCuda; #[rust_cuda::kernel::kernel(pub use link! for impl)] @@ -56,32 +54,27 @@ pub fn simulate< ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - simulation: & /* mut */ - rust_cuda::kernel::param::PtxJit< + simulation: &rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_core::simulation::Simulation, >, >, - task_list: & /* mut */ - rust_cuda::kernel::param::PtxJit< + task_list: &mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer, >, >, - event_buffer_reporter: & /* mut */ - rust_cuda::kernel::param::PtxJit< + event_buffer_reporter: &mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::event_buffer::EventBuffer, >, >, - min_spec_sample_buffer: & /* mut */ - rust_cuda::kernel::param::PtxJit< + min_spec_sample_buffer: &mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer, >, >, - next_event_time_buffer: & /* mut */ - rust_cuda::kernel::param::PtxJit< + next_event_time_buffer: &mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer< necsim_core_bond::PositiveF64, @@ -101,46 +94,49 @@ pub fn simulate< necsim_core_bond::NonNegativeF64, >, ) { - // task_list.with_value_for_core(|task| { - // // Discard the prior task (the simulation is just a temporary local copy) - // core::mem::drop( - // simulation - // .active_lineage_sampler_mut() - // .replace_active_lineage(task), - // ); - - // // Discard the prior sample (the simulation is just a temporary local copy) - // simulation.event_sampler_mut().replace_min_speciation(None); - - // let mut final_next_event_time = None; - - // let (time, steps) = simulation.simulate_incremental_early_stop( - // |_, steps, next_event_time| { - // final_next_event_time = Some(next_event_time); - - // if steps >= max_steps || next_event_time >= max_next_event_time { - // ControlFlow::Break(()) - // } else { - // ControlFlow::Continue(()) - // } - // }, - // event_buffer_reporter, - // ); - - // next_event_time_buffer.put_value_for_core(final_next_event_time); - - // if steps > 0 { - // total_time_max.fetch_max(time.get().to_bits(), core::sync::atomic::Ordering::Relaxed); - // total_steps_sum.fetch_add(steps, core::sync::atomic::Ordering::Relaxed); - // } - - // min_spec_sample_buffer - // .put_value_for_core(simulation.event_sampler_mut().replace_min_speciation(None)); - - // simulation - // .active_lineage_sampler_mut() - // .replace_active_lineage(None) - // }); + // TODO: use simulation with non-allocating clone + let mut simulation = unsafe { core::mem::ManuallyDrop::new(core::ptr::read(simulation)) }; + + task_list.with_value_for_core(|task| { + // Discard the prior task (the simulation is just a temporary local copy) + core::mem::drop( + simulation + .active_lineage_sampler_mut() + .replace_active_lineage(task), + ); + + // Discard the prior sample (the simulation is just a temporary local copy) + simulation.event_sampler_mut().replace_min_speciation(None); + + let mut final_next_event_time = None; + + let (time, steps) = simulation.simulate_incremental_early_stop( + |_, steps, next_event_time| { + final_next_event_time = Some(next_event_time); + + if steps >= max_steps || next_event_time >= max_next_event_time { + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) + } + }, + event_buffer_reporter, + ); + + next_event_time_buffer.put_value_for_core(final_next_event_time); + + if steps > 0 { + total_time_max.fetch_max(time.get().to_bits(), core::sync::atomic::Ordering::Relaxed); + total_steps_sum.fetch_add(steps, core::sync::atomic::Ordering::Relaxed); + } + + min_spec_sample_buffer + .put_value_for_core(simulation.event_sampler_mut().replace_min_speciation(None)); + + simulation + .active_lineage_sampler_mut() + .replace_active_lineage(None) + }); } #[cfg(target_os = "cuda")] diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs index 006670903..6c8dee90a 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs @@ -42,19 +42,21 @@ pub struct FixUpInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for FixUpInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -76,8 +78,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs index 0775bf6bd..72b836902 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs @@ -23,17 +23,19 @@ use super::CudaLineageStoreSampleInitialiser; #[allow(clippy::module_name_repetitions)] pub struct GenesisInitialiser; -impl + RustToCuda, O: Scenario> +impl + RustToCuda + Sync, O: Scenario> CudaLineageStoreSampleInitialiser for GenesisInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -50,8 +52,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs index cbc997aaa..8a0d9a27c 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs @@ -28,38 +28,40 @@ pub mod resume; #[allow(clippy::module_name_repetitions)] pub trait CudaLineageStoreSampleInitialiser< M: MathsCore, - G: PrimeableRng + RustToCuda, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, Error: From, > where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { - type DispersalSampler: DispersalSampler + RustToCuda; + type DispersalSampler: DispersalSampler + RustToCuda + Sync; type ActiveLineageSampler< X: EmigrationExit< M, O::Habitat, G, IndependentLineageStore, - > + RustToCuda, - J: EventTimeSampler + RustToCuda, + > + RustToCuda + Sync, + J: EventTimeSampler + RustToCuda + Sync, >: SingularActiveLineageSampler< M, O::Habitat, G, IndependentLineageStore, X, Self::DispersalSampler, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, G, X, Self::DispersalSampler, O::TurnoverRate, O::SpeciationProbability >, NeverImmigrationEntry, - > + RustToCuda; + > + RustToCuda + Sync; #[allow(clippy::type_complexity)] fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs index 6102b7da9..478690d96 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs @@ -31,19 +31,21 @@ pub struct ResumeInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for ResumeInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -60,8 +62,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 4a6390d9e..409cb766b 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -25,15 +25,16 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{ - lend::RustToCuda, deps::rustacuda::{ function::{BlockSize, GridSize}, prelude::{Stream, StreamFlags}, - }, kernel::{CompiledKernelPtx, TypedPtxKernel, LaunchConfig, Launcher}, host::CudaDropWrapper, + }, + host::CudaDropWrapper, + kernel::{CompiledKernelPtx, LaunchConfig, Launcher, TypedPtxKernel}, + lend::RustToCuda, }; use crate::{ @@ -49,13 +50,39 @@ use crate::{ #[allow(clippy::too_many_lines)] pub fn initialise_and_simulate< 'p, - M: MathsCore, + M: MathsCore + Sync, O: Scenario>>, R: Reporter, P: LocalPartition<'p, R>, I: Iterator, L: CudaLineageStoreSampleInitialiser>, O, Error>, Error: From, + Ptx: CompiledKernelPtx< + simulate< + M, + O::Habitat, + CudaRng>, + IndependentLineageStore, + NeverEmigrationExit, + L::DispersalSampler, + IndependentCoalescenceSampler, + O::TurnoverRate, + O::SpeciationProbability, + IndependentEventSampler< + M, + O::Habitat, + CudaRng>, + NeverEmigrationExit, + L::DispersalSampler, + O::TurnoverRate, + O::SpeciationProbability, + >, + NeverImmigrationEntry, + L::ActiveLineageSampler, + R::ReportSpeciation, + R::ReportDispersal, + >, + >, >( args: &CudaArguments, rng: CudaRng>, @@ -66,58 +93,11 @@ pub fn initialise_and_simulate< lineage_store_sampler_initialiser: L, ) -> Result>>, Error> where - O::Habitat: RustToCuda, + O::Habitat: RustToCuda + Sync, O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, - SimulationKernelPtx< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - L::DispersalSampler, - O::TurnoverRate, - O::SpeciationProbability, - >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, - >: CompiledKernelPtx>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - L::DispersalSampler, - O::TurnoverRate, - O::SpeciationProbability, - >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, - >>, + RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { let ( habitat, @@ -197,8 +177,8 @@ where let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { let stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); - - let mut kernel = TypedPtxKernel::new(Some(Box::new(|kernel| { + + let mut kernel = TypedPtxKernel::new::(Some(Box::new(|kernel| { crate::info::print_kernel_function_attributes("simulate", kernel); Ok(()) }))); diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index 93819cf23..6cea35163 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -5,6 +5,7 @@ #[macro_use] extern crate serde_derive_state; +use initialiser::CudaLineageStoreSampleInitialiser; use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; @@ -37,10 +38,10 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx}; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; mod arguments; mod cuda; @@ -73,17 +74,17 @@ impl AlgorithmDefaults for CudaAlgorithm { #[allow(clippy::trait_duplication_in_bounds)] impl< 'p, - M: MathsCore, + M: MathsCore + Sync, O: Scenario>>, R: Reporter, P: LocalPartition<'p, R>, > Algorithm<'p, M, O, R, P> for CudaAlgorithm where - O::Habitat: RustToCuda, + O::Habitat: RustToCuda + Sync, O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, SimulationKernelPtx< M, O::Habitat, @@ -122,45 +123,47 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: CompiledKernelPtx>, - IndependentLineageStore, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, CudaRng>, + IndependentLineageStore, NeverEmigrationExit, O::DispersalSampler< InMemoryPackedAliasDispersalSampler>>, >, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + IndependentEventSampler< + M, + O::Habitat, + CudaRng>, + NeverEmigrationExit, + O::DispersalSampler< + InMemoryPackedAliasDispersalSampler>>, + >, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ExpEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + CudaRng>, + NeverEmigrationExit, + O::DispersalSampler< + InMemoryPackedAliasDispersalSampler>>, + >, + O::TurnoverRate, + O::SpeciationProbability, + ExpEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, - >>, + >, SimulationKernelPtx< M, O::Habitat, @@ -217,28 +220,12 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: CompiledKernelPtx>, - IndependentLineageStore, - NeverEmigrationExit, - TrespassingDispersalSampler< - M, - O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, CudaRng>, + IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, @@ -249,31 +236,49 @@ where >, UniformAntiTrespassingDispersalSampler>>, >, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - TrespassingDispersalSampler< + IndependentEventSampler< M, O::Habitat, CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + CudaRng>, + O::DispersalSampler< + InMemoryPackedAliasDispersalSampler>>, + >, + UniformAntiTrespassingDispersalSampler>>, >, - UniformAntiTrespassingDispersalSampler>>, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ConstEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + CudaRng>, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + CudaRng>, + O::DispersalSampler< + InMemoryPackedAliasDispersalSampler>>, + >, + UniformAntiTrespassingDispersalSampler>>, + >, + O::TurnoverRate, + O::SpeciationProbability, + ConstEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, - >>, + >, { type LineageStore = IndependentLineageStore; type Rng = CudaRng>; @@ -296,7 +301,22 @@ where pause_before: Option, local_partition: &mut P, ) -> Result, Self::Error> { - launch::initialise_and_simulate( + launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + >::DispersalSampler, + _, + _, + _, + _, + _, + >::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -322,7 +342,22 @@ where pause_before: Option, local_partition: &mut P, ) -> Result, ResumeError> { - launch::initialise_and_simulate( + launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -351,13 +386,28 @@ where fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, ) -> Result, ResumeError> { - launch::initialise_and_simulate( + launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ExpEventTimeSampler>, + _, + _, + >>( &args, rng, scenario, pre_sampler, - Some(PositiveF64::max_after(restart_at.into(), restart_at.into()).into()), - local_partition, + Some(PositiveF64::max_after(restart_at.into(), + restart_at.into()).into()), local_partition, FixUpInitialiser { lineages, restart_at, diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index 6162925f4..eccfb8aba 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -1,10 +1,10 @@ use std::{collections::VecDeque, convert::TryInto, num::NonZeroU64, sync::atomic::AtomicU64}; use rust_cuda::{ - lend::{RustToCuda, LendToCuda}, host::HostAndDeviceMutRef, - deps::rustacuda::function::{BlockSize, GridSize}, - utils::exchange::wrapper::ExchangeWrapperOnHost, kernel::Launcher, + kernel::Launcher, + lend::{LendToCuda, RustToCuda}, + utils::exchange::wrapper::ExchangeWrapperOnHost, }; use necsim_core::{ @@ -37,7 +37,6 @@ use necsim_partitioning_core::LocalPartition; use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use crate::error::CudaError; @@ -48,19 +47,18 @@ type Result = std::result::Result; pub fn simulate< 'l, 'p, - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler - + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, P: Reporter, L: LocalPartition<'p, P>, LI: IntoIterator, @@ -87,39 +85,12 @@ pub fn simulate< event_slice: EventSlice, pause_before: Option, local_partition: &'l mut L, -) -> Result<(Status, NonNegativeF64, u64, impl IntoIterator)> - where SimulationKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, -{ +) -> Result<( + Status, + NonNegativeF64, + u64, + impl IntoIterator, +)> { let mut slow_lineages = lineages .into_iter() .map(|lineage| { @@ -143,7 +114,7 @@ pub fn simulate< L, >>::WaterLevelReporter::new(event_slice.get(), local_partition); - let (grid_size, block_size, dedup_cache, step_slice) = config; + let (dedup_cache, step_slice) = config; #[allow(clippy::or_fun_call)] let intial_max_time = slow_lineages @@ -153,10 +124,13 @@ pub fn simulate< .unwrap_or(NonNegativeF64::zero()); // Initialise the total_time_max and total_steps_sum atomics - let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()).into(); - let mut total_steps_sum = AtomicU64::new(0_u64).into(); + let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()); + let mut total_steps_sum = AtomicU64::new(0_u64); - let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut event_buffer: ExchangeWrapperOnHost< EventBuffer< <>::WaterLevelReporter as Reporter>::ReportDispersal, >, > = ExchangeWrapperOnHost::new(EventBuffer::new( - &block_size, - &grid_size, + &launcher.config.block, &launcher.config.grid, step_slice.get().try_into().unwrap_or(usize::MAX), )?)?; - let mut min_spec_sample_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; - let mut next_event_time_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut min_spec_sample_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; + let mut next_event_time_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut min_spec_samples = dedup_cache.construct(slow_lineages.len()); @@ -196,7 +173,7 @@ pub fn simulate< HostAndDeviceMutRef::with_new(&mut total_time_max, |total_time_max| -> Result<()> { HostAndDeviceMutRef::with_new(&mut total_steps_sum, |total_steps_sum| -> Result<()> { // TODO: Pipeline async launches and callbacks of simulation/event analysis - simulation.lend_to_cuda_mut(|mut simulation_cuda_repr| -> Result<()> { + simulation.lend_to_cuda(|simulation_cuda_repr| -> Result<()> { while !slow_lineages.is_empty() && pause_before.map_or(true, |pause_before| level_time < pause_before) { @@ -242,8 +219,16 @@ pub fn simulate< proxy.advance_water_level(level_time); // Simulate all slow lineages until they have finished or exceeded the - // new water level + // new water level while !slow_lineages.is_empty() { + // Move the event buffer and min speciation sample buffer to CUDA + let mut event_buffer_cuda_async = + event_buffer.move_to_device_async(launcher.stream)?; + let mut min_spec_sample_buffer_cuda_async = + min_spec_sample_buffer.move_to_device_async(launcher.stream)?; + let mut next_event_time_buffer_cuda_async = + next_event_time_buffer.move_to_device_async(launcher.stream)?; + // Upload the new tasks from the front of the task queue for mut task in task_list.iter_mut() { let next_slow_lineage = loop { @@ -261,31 +246,36 @@ pub fn simulate< task.replace(next_slow_lineage); } - // Move the task list, event buffer and min speciation sample buffer - // to CUDA - let mut event_buffer_cuda = event_buffer.move_to_device()?; - let mut min_spec_sample_buffer_cuda = - min_spec_sample_buffer.move_to_device()?; - let mut next_event_time_buffer_cuda = - next_event_time_buffer.move_to_device()?; - let mut task_list_cuda = task_list.move_to_device()?; - - kernel.simulate_raw( - simulation_cuda_repr.as_mut(), - task_list_cuda.as_mut(), - event_buffer_cuda.as_mut(), - min_spec_sample_buffer_cuda.as_mut(), - next_event_time_buffer_cuda.as_mut(), - total_time_max.as_ref(), - total_steps_sum.as_ref(), - step_slice.get().into(), - level_time.into(), + // Move the task list to CUDA + let mut task_list_cuda_async = + task_list.move_to_device_async(launcher.stream)?; + + launcher.launch9_async( + simulation_cuda_repr.as_async(launcher.stream).as_ref(), + task_list_cuda_async.as_mut_async().proj_mut(), + event_buffer_cuda_async.as_mut_async().proj_mut(), + min_spec_sample_buffer_cuda_async.as_mut_async().proj_mut(), + next_event_time_buffer_cuda_async.as_mut_async().proj_mut(), + total_time_max.as_ref().as_async(launcher.stream).as_ref(), + total_steps_sum.as_ref().as_async(launcher.stream).as_ref(), + step_slice.get(), + level_time, )?; - min_spec_sample_buffer = min_spec_sample_buffer_cuda.move_to_host()?; - next_event_time_buffer = next_event_time_buffer_cuda.move_to_host()?; - task_list = task_list_cuda.move_to_host()?; - event_buffer = event_buffer_cuda.move_to_host()?; + let min_spec_sample_buffer_host_async = + min_spec_sample_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let next_event_time_buffer_host_async = + next_event_time_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let task_list_host_async = + task_list_cuda_async.move_to_host_async(launcher.stream)?; + let event_buffer_host_async = + event_buffer_cuda_async.move_to_host_async(launcher.stream)?; + + task_list = task_list_host_async.synchronize()?; + next_event_time_buffer = next_event_time_buffer_host_async.synchronize()?; + min_spec_sample_buffer = min_spec_sample_buffer_host_async.synchronize()?; // Fetch the completion of the tasks for ((mut spec_sample, mut next_event_time), mut task) in @@ -303,8 +293,7 @@ pub fn simulate< { if !duplicate_individual { // Reclassify lineages as either slow (still below - // water) or - // fast + // the metaphorical water level) or fast if next_event_time < level_time { slow_lineages.push_back((task, next_event_time.into())); } else { @@ -314,6 +303,7 @@ pub fn simulate< } } + event_buffer = event_buffer_host_async.synchronize()?; event_buffer.report_events_unordered(&mut proxy); proxy.local_partition().get_reporter().report_progress( @@ -336,10 +326,9 @@ pub fn simulate< })?; // Safety: Max of NonNegativeF64 values from the GPU - let total_time_max = unsafe { - NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner().into_inner())) - }; - let total_steps_sum = total_steps_sum.into_inner().into_inner(); + let total_time_max = + unsafe { NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner())) }; + let total_steps_sum = total_steps_sum.into_inner(); local_partition.report_progress_sync(slow_lineages.len() as u64); @@ -348,5 +337,9 @@ pub fn simulate< local_partition.reduce_global_time_steps(total_time_max, total_steps_sum); let lineages = slow_lineages.into_iter().map(|(lineage, _)| lineage); + // Note: The simulation requires no mutation, since all components are + // either immutable or have singular swap states, and the list + // of all lineages (which does change) is returned separately + Ok((status, global_time, global_steps, lineages)) } From 5cd2354fe2c73412a5b6b9d2f822cdb508eaddcb Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 10 Jan 2024 19:17:53 +0000 Subject: [PATCH 04/28] Some progress with dispatch coersion --- Cargo.lock | 2 + rustcoalescence/Cargo.toml | 9 ++++ .../algorithms/cuda/cpu-kernel/src/lib.rs | 1 + .../algorithms/cuda/cpu-kernel/src/patch.rs | 16 +++---- .../algorithms/cuda/gpu-kernel/src/lib.rs | 1 + rustcoalescence/algorithms/cuda/src/lib.rs | 2 +- .../dispatch/valid/algorithm_scenario.rs | 45 +++++++++++++++++-- 7 files changed, 64 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39a949033..a3c19625c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1594,6 +1594,7 @@ dependencies = [ "log", "necsim-core", "necsim-core-bond", + "necsim-impls-cuda", "necsim-impls-no-std", "necsim-impls-std", "necsim-partitioning-core", @@ -1601,6 +1602,7 @@ dependencies = [ "necsim-partitioning-mpi", "necsim-plugins-core", "ron", + "rust-cuda", "rustcoalescence-algorithms", "rustcoalescence-algorithms-cuda", "rustcoalescence-algorithms-gillespie", diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index ebf41fc18..e0e9ff5d7 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -10,10 +10,17 @@ edition = "2021" [features] default = [] +necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] + +rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] +rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:necsim-impls-cuda", "dep:rust-cuda"] + [dependencies] necsim-core = { path = "../necsim/core" } necsim-core-bond = { path = "../necsim/core/bond" } necsim-impls-no-std = { path = "../necsim/impls/no-std" } +necsim-impls-cuda = { path = "../necsim/impls/cuda", optional = true } necsim-impls-std = { path = "../necsim/impls/std" } necsim-plugins-core = { path = "../necsim/plugins/core", features = ["import"] } necsim-partitioning-core = { path = "../necsim/partitioning/core" } @@ -28,6 +35,8 @@ rustcoalescence-algorithms-gillespie = { path = "algorithms/gillespie", optional rustcoalescence-algorithms-independent = { path = "algorithms/independent", optional = true } rustcoalescence-algorithms-cuda = { path = "algorithms/cuda", optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = [], optional = true } + clap = { version = "4.0", features = ["derive"] } anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index cb570a68c..3176cdb11 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,6 +1,7 @@ #![deny(clippy::pedantic)] #![feature(c_str_literals)] #![feature(min_specialization)] +#![allow(long_running_const_eval)] #![recursion_limit = "1024"] use necsim_core::{ diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 828562714..6b0560c56 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -48,14 +48,14 @@ unsafe impl< simulate, > for SimulationKernelPtx where - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, + // SimulationKernelPtx: + // CompiledKernelPtx>, + // SimulationKernelPtx: + // CompiledKernelPtx>, + // SimulationKernelPtx: + // CompiledKernelPtx>, + // SimulationKernelPtx: + // CompiledKernelPtx>, { default fn get_ptx() -> &'static CStr { unsafe { unreachable_cuda_simulation_linking_reporter() } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 292aab1ec..806d664c2 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -4,6 +4,7 @@ #![feature(decl_macro)] #![feature(c_str_literals)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] +#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index 6cea35163..f82b7600d 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -398,7 +398,7 @@ where _, _, _, - as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ExpEventTimeSampler>, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ConstEventTimeSampler>, _, _, >>( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index b8628421f..2817fb1fd 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -103,6 +103,7 @@ macro_rules! match_scenario_algorithm { } #[allow(clippy::too_many_arguments)] +#[allow(clippy::too_many_lines)] // FIXME pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( local_partition: P, @@ -150,9 +151,47 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( }, #[cfg(feature = "rustcoalescence-algorithms-cuda")] AlgorithmArgs::Cuda(algorithm_args) => { - rng::dispatch::< - ::MathsCore, - CudaAlgorithm, _, R, P, + fn coerce_cuda_dispatch< + 'p, + M: necsim_core::cogs::MathsCore + Sync, + O: Scenario, + >>, + R: Reporter, + P: LocalPartition<'p, R>, + >( + local_partition: P, + + sample: crate::args::config::sample::Sample, + algorithm_args: ::Arguments, + scenario: O, + pause_before: Option, + + ron_args: &str, + normalised_args: &mut BufferingSimulateArgsBuilder, + ) -> anyhow::Result where + O::Habitat: rust_cuda::lend::RustToCuda + Sync, + O::DispersalSampler< + necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler< + M, O::Habitat, necsim_impls_cuda::cogs::rng::CudaRng< + M, necsim_impls_no_std::cogs::rng::wyhash::WyHash, + > + > + >: rust_cuda::lend::RustToCuda + Sync, + O::TurnoverRate: rust_cuda::lend::RustToCuda + Sync, + O::SpeciationProbability: rust_cuda::lend::RustToCuda + Sync, + { + rng::dispatch::< + M, + CudaAlgorithm, _, R, P, + >( + local_partition, sample, algorithm_args, scenario, + pause_before, ron_args, normalised_args, + ) + } + + coerce_cuda_dispatch::< + ::MathsCore, _, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, From 65ed1c88d82e8013558e81de0a550851e957caea Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 10 Jan 2024 20:15:14 +0000 Subject: [PATCH 05/28] Small cleanup --- Cargo.lock | 1 - rustcoalescence/Cargo.toml | 3 +- .../algorithms/cuda/cpu-kernel/src/patch.rs | 23 ++-- rustcoalescence/algorithms/cuda/src/launch.rs | 23 ++-- rustcoalescence/algorithms/cuda/src/lib.rs | 126 +++++++----------- .../gillespie/src/event_skipping/mod.rs | 30 +++-- .../gillespie/src/gillespie/classical/mod.rs | 20 +-- .../algorithms/gillespie/src/gillespie/mod.rs | 3 + .../gillespie/src/gillespie/turnover/mod.rs | 26 ++-- .../algorithms/independent/src/lib.rs | 30 +++-- rustcoalescence/algorithms/src/lib.rs | 17 +-- .../dispatch/valid/algorithm_scenario.rs | 17 ++- .../src/cli/simulate/dispatch/valid/info.rs | 16 +-- .../src/cli/simulate/dispatch/valid/launch.rs | 14 +- .../src/cli/simulate/dispatch/valid/rng.rs | 14 +- 15 files changed, 183 insertions(+), 180 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3c19625c..5c4ca197c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1594,7 +1594,6 @@ dependencies = [ "log", "necsim-core", "necsim-core-bond", - "necsim-impls-cuda", "necsim-impls-no-std", "necsim-impls-std", "necsim-partitioning-core", diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index e0e9ff5d7..71c16c9b5 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -14,13 +14,12 @@ necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] -rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:necsim-impls-cuda", "dep:rust-cuda"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:rust-cuda"] [dependencies] necsim-core = { path = "../necsim/core" } necsim-core-bond = { path = "../necsim/core/bond" } necsim-impls-no-std = { path = "../necsim/impls/no-std" } -necsim-impls-cuda = { path = "../necsim/impls/cuda", optional = true } necsim-impls-std = { path = "../necsim/impls/std" } necsim-plugins-core = { path = "../necsim/plugins/core", features = ["import"] } necsim-partitioning-core = { path = "../necsim/partitioning/core" } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 6b0560c56..99632da65 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -5,7 +5,7 @@ use necsim_core::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, - reporter::boolean::{Boolean, False, True}, + reporter::boolean::Boolean, // reporter::boolean::{Boolean, False, True}, }; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, @@ -46,16 +46,17 @@ unsafe impl< > CompiledKernelPtx< simulate, - > for SimulationKernelPtx -where - // SimulationKernelPtx: - // CompiledKernelPtx>, - // SimulationKernelPtx: - // CompiledKernelPtx>, - // SimulationKernelPtx: - // CompiledKernelPtx>, - // SimulationKernelPtx: - // CompiledKernelPtx>, + > + for SimulationKernelPtx +//where +// SimulationKernelPtx: +// CompiledKernelPtx>, +// SimulationKernelPtx: +// CompiledKernelPtx>, +// SimulationKernelPtx: +// CompiledKernelPtx>, +// SimulationKernelPtx: +// CompiledKernelPtx>, { default fn get_ptx() -> &'static CStr { unsafe { unreachable_cuda_simulation_linking_reporter() } diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 409cb766b..3c8f255a6 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -3,7 +3,6 @@ use std::marker::PhantomData; use necsim_core::{cogs::MathsCore, reporter::Reporter, simulation::SimulationBuilder}; use necsim_core_bond::NonNegativeF64; -use necsim_impls_cuda::cogs::rng::CudaRng; use necsim_impls_no_std::{ cogs::{ active_lineage_sampler::independent::event_time_sampler::exp::ExpEventTimeSampler, @@ -16,7 +15,6 @@ use necsim_impls_no_std::{ origin_sampler::{ decomposition::DecompositionOriginSampler, pre_sampler::OriginPreSampler, }, - rng::wyhash::WyHash, }, parallelisation::Status, }; @@ -25,7 +23,7 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; +use rustcoalescence_algorithms_cuda_gpu_kernel::{simulate, PrimeableRng}; use rust_cuda::{ deps::rustacuda::{ @@ -51,17 +49,18 @@ use crate::{ pub fn initialise_and_simulate< 'p, M: MathsCore + Sync, - O: Scenario>>, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, I: Iterator, - L: CudaLineageStoreSampleInitialiser>, O, Error>, + L: CudaLineageStoreSampleInitialiser, Error: From, Ptx: CompiledKernelPtx< simulate< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, L::DispersalSampler, @@ -71,7 +70,7 @@ pub fn initialise_and_simulate< IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, L::DispersalSampler, O::TurnoverRate, @@ -85,17 +84,16 @@ pub fn initialise_and_simulate< >, >( args: &CudaArguments, - rng: CudaRng>, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, lineage_store_sampler_initialiser: L, -) -> Result>>, Error> +) -> Result, Error> where O::Habitat: RustToCuda + Sync, - O::DispersalSampler>>>: - RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, O::TurnoverRate: RustToCuda + Sync, O::SpeciationProbability: RustToCuda + Sync, { @@ -106,8 +104,7 @@ where speciation_probability, origin_sampler_auxiliary, decomposition_auxiliary, - ) = scenario - .build::>>>(); + ) = scenario.build::>(); let coalescence_sampler = IndependentCoalescenceSampler::default(); let event_sampler = IndependentEventSampler::default(); diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index f82b7600d..c977ae339 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -39,7 +39,7 @@ use rustcoalescence_algorithms::{ use rustcoalescence_scenarios::Scenario; use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; -use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; +use rustcoalescence_algorithms_cuda_gpu_kernel::{simulate, PrimeableRng}; use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; @@ -69,42 +69,39 @@ impl AlgorithmParamters for CudaAlgorithm { impl AlgorithmDefaults for CudaAlgorithm { type MathsCore = NvptxMathsCore; + type Rng = CudaRng>; } #[allow(clippy::trait_duplication_in_bounds)] impl< 'p, M: MathsCore + Sync, - O: Scenario>>, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, - > Algorithm<'p, M, O, R, P> for CudaAlgorithm + > Algorithm<'p, M, G, O, R, P> for CudaAlgorithm where O::Habitat: RustToCuda + Sync, - O::DispersalSampler>>>: - RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, O::TurnoverRate: RustToCuda + Sync, O::SpeciationProbability: RustToCuda + Sync, SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, >, @@ -112,11 +109,9 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, ExpEventTimeSampler, @@ -127,23 +122,19 @@ where simulate< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, >, @@ -151,11 +142,9 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, ExpEventTimeSampler, @@ -167,17 +156,15 @@ where SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, IndependentCoalescenceSampler, O::TurnoverRate, @@ -185,16 +172,14 @@ where IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -203,16 +188,14 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -224,17 +207,15 @@ where simulate< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, IndependentCoalescenceSampler, O::TurnoverRate, @@ -242,16 +223,14 @@ where IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -260,16 +239,14 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -281,7 +258,6 @@ where >, { type LineageStore = IndependentLineageStore; - type Rng = CudaRng>; fn get_logical_partition(args: &Self::Arguments, _local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -295,13 +271,13 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { - launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + ) -> Result, Self::Error> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< _, _, _, @@ -334,15 +310,15 @@ where #[allow(clippy::too_many_lines)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< _, _, _, @@ -378,15 +354,15 @@ where #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate::<_, _, _, _, _, _, _, SimulationKernelPtx< + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< _, _, _, diff --git a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs index 4dc2bfa4e..1c37f0db9 100644 --- a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler}, + cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -39,18 +39,24 @@ impl AlgorithmParamters for EventSkippingAlgorithm { impl AlgorithmDefaults for EventSkippingAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for EventSkippingAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for EventSkippingAlgorithm where O::LineageStore>: GloballyCoherentLineageStore, - O::DispersalSampler>>: - SeparableDispersalSampler>, + O::DispersalSampler>: + SeparableDispersalSampler, { type LineageStore = O::LineageStore>; - type Rng = Pcg; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) @@ -58,12 +64,12 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -81,14 +87,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -109,14 +115,14 @@ where /// simulation (incl. running the algorithm) failed fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs index 06114bca7..fd8ffe02e 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, turnover_rate::uniform::UniformTurnoverRate, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::{ @@ -31,11 +30,12 @@ use initialiser::{ // Optimised 'Classical' implementation for the `UniformTurnoverSampler` impl< 'p, - O: Scenario, TurnoverRate = UniformTurnoverRate>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore, - > Algorithm<'p, M, O, R, P> for GillespieAlgorithm + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, @@ -43,12 +43,12 @@ where #[allow(clippy::too_many_lines)] fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -66,14 +66,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -95,14 +95,14 @@ where #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs index f485eb6a6..c1f775555 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs @@ -1,4 +1,6 @@ +use necsim_core::cogs::MathsCore; use necsim_impls_no_std::cogs::maths::intrinsics::IntrinsicsMathsCore; +use necsim_impls_std::cogs::rng::pcg::Pcg; use rustcoalescence_algorithms::{AlgorithmDefaults, AlgorithmParamters}; @@ -17,4 +19,5 @@ impl AlgorithmParamters for GillespieAlgorithm { impl AlgorithmDefaults for GillespieAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs index 7f44e6280..b8273a461 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::{partition::Partition, LocalPartition}; use rustcoalescence_algorithms::{ @@ -31,14 +30,19 @@ use initialiser::{ }; // Default 'Gillespie' implementation for any turnover sampler -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for GillespieAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, { type LineageStore = O::LineageStore>; - type Rng = Pcg; default fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) @@ -47,12 +51,12 @@ where #[allow(clippy::shadow_unrelated, clippy::too_many_lines)] default fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -74,14 +78,14 @@ where L: ExactSizeIterator, >( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -103,14 +107,14 @@ where #[allow(clippy::too_many_lines)] default fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/independent/src/lib.rs b/rustcoalescence/algorithms/independent/src/lib.rs index 8a7d0473d..11eeba8e7 100644 --- a/rustcoalescence/algorithms/independent/src/lib.rs +++ b/rustcoalescence/algorithms/independent/src/lib.rs @@ -4,7 +4,11 @@ #[macro_use] extern crate serde_derive_state; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::{ @@ -39,13 +43,19 @@ impl AlgorithmParamters for IndependentAlgorithm { impl AlgorithmDefaults for IndependentAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = WyHash; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for IndependentAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: PrimeableRng, + > Algorithm<'p, M, G, O, R, P> for IndependentAlgorithm { type LineageStore = IndependentLineageStore; - type Rng = WyHash; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -62,12 +72,12 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( &args, rng, @@ -85,14 +95,14 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, @@ -114,14 +124,14 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, diff --git a/rustcoalescence/algorithms/src/lib.rs b/rustcoalescence/algorithms/src/lib.rs index 7ed7ce88a..da7ada445 100644 --- a/rustcoalescence/algorithms/src/lib.rs +++ b/rustcoalescence/algorithms/src/lib.rs @@ -27,17 +27,18 @@ pub trait AlgorithmParamters { pub trait AlgorithmDefaults { type MathsCore: MathsCore; + type Rng: RngCore; } pub trait Algorithm< 'p, M: MathsCore, - O: Scenario, + G: RngCore, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >: Sized + AlgorithmParamters + AlgorithmDefaults { - type Rng: RngCore; type LineageStore: LineageStore; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition; @@ -48,12 +49,12 @@ pub trait Algorithm< /// the algorithm failed fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error>; + ) -> Result, Self::Error>; /// # Errors /// @@ -62,14 +63,14 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; /// # Errors /// @@ -78,12 +79,12 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; } diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index 2817fb1fd..09651390c 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -123,6 +123,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Gillespie(algorithm_args) => { rng::dispatch::< ::MathsCore, + ::Rng<_>, GillespieAlgorithm, _, R, P, >( local_partition, sample, algorithm_args, scenario, @@ -133,6 +134,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::EventSkipping(algorithm_args) => { rng::dispatch::< ::MathsCore, + ::Rng<_>, EventSkippingAlgorithm, _, R, P, >( local_partition, sample, algorithm_args, scenario, @@ -143,6 +145,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Independent(algorithm_args) => { rng::dispatch::< ::MathsCore, + ::Rng<_>, IndependentAlgorithm, _, R, P, >( local_partition, sample, algorithm_args, scenario, @@ -154,9 +157,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( fn coerce_cuda_dispatch< 'p, M: necsim_core::cogs::MathsCore + Sync, - O: Scenario, - >>, + G: necsim_core::cogs::PrimeableRng + rust_cuda::lend::RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( @@ -173,9 +175,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( O::Habitat: rust_cuda::lend::RustToCuda + Sync, O::DispersalSampler< necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler< - M, O::Habitat, necsim_impls_cuda::cogs::rng::CudaRng< - M, necsim_impls_no_std::cogs::rng::wyhash::WyHash, - > + M, O::Habitat, G, > >: rust_cuda::lend::RustToCuda + Sync, O::TurnoverRate: rust_cuda::lend::RustToCuda + Sync, @@ -183,6 +183,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( { rng::dispatch::< M, + G, CudaAlgorithm, _, R, P, >( local_partition, sample, algorithm_args, scenario, @@ -191,7 +192,9 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( } coerce_cuda_dispatch::< - ::MathsCore, _, R, P, + ::MathsCore, + ::Rng<_>, + _, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs index aac4223c9..1b390136c 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs @@ -5,7 +5,7 @@ use anyhow::{Context, Result}; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; use necsim_core::{ - cogs::MathsCore, + cogs::{MathsCore, RngCore}, reporter::{boolean::Boolean, Reporter}, }; use necsim_core_bond::NonNegativeF64; @@ -25,23 +25,23 @@ use super::{super::super::BufferingSimulateArgsBuilder, launch}; pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, mut local_partition: P, normalised_args: &BufferingSimulateArgsBuilder, -) -> anyhow::Result> +) -> anyhow::Result> where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { let config_str = normalised_args .build() @@ -118,7 +118,7 @@ where warn!("The simulation will report no events."); } - let result = launch::simulate::( + let result = launch::simulate::( algorithm_args, rng, scenario, diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs index e070202d6..ceb5ff4c8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs @@ -2,7 +2,10 @@ use anyhow::Context; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; -use necsim_core::{cogs::MathsCore, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, RngCore}, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::origin_sampler::pre_sampler::OriginPreSampler; use necsim_partitioning_core::LocalPartition; @@ -14,18 +17,19 @@ use crate::args::config::sample::{Sample, SampleMode, SampleModeRestart, SampleO pub(super) fn simulate< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, local_partition: &mut P, -) -> anyhow::Result> { +) -> anyhow::Result> { let lineages = match sample.origin { SampleOrigin::Habitat => { return A::initialise_and_simulate( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs index 929556339..2907eddc8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs @@ -27,8 +27,9 @@ use super::{ pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( @@ -43,17 +44,16 @@ pub(super) fn dispatch< normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { - let rng: A::Rng = match parse::rng::parse_and_normalise( + let rng: G = match parse::rng::parse_and_normalise( ron_args, normalised_args, &mut A::get_logical_partition(&algorithm_args, &local_partition), )? { RngArgs::Seed(seed) => SeedableRng::seed_from_u64(seed), RngArgs::Sponge(bytes) => { - let mut seed = >::Seed::default(); + let mut seed = G::Seed::default(); let mut sponge = Keccak::v256(); sponge.update(&bytes); @@ -64,7 +64,7 @@ where RngArgs::State(state) => state.into(), }; - let result = info::dispatch::( + let result = info::dispatch::( algorithm_args, rng, scenario, From b6dd445498505fa77ac8c70c4d85343d9b609a77 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 11 Jan 2024 03:57:07 +0000 Subject: [PATCH 06/28] Cleanup cuda algorithm coersion --- .../alias/sampler/indexed/tests.rs | 8 +- .../alias/sampler/stack/tests.rs | 8 +- .../algorithms/cuda/cpu-kernel/src/lib.rs | 1 - .../algorithms/cuda/cpu-kernel/src/link.rs | 50 +++++- .../algorithms/cuda/cpu-kernel/src/patch.rs | 169 +++++++++++++++--- .../dispatch/valid/algorithm_scenario.rs | 83 +++------ 6 files changed, 229 insertions(+), 90 deletions(-) diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs index 00e8d33a5..04d554d8e 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs @@ -1037,7 +1037,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [], total_weight: 0.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [], total_weight: 0.0, .. }" ); for i in (1..=6_u8).rev() { @@ -1046,7 +1046,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 21.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 21.0, .. }" ); let mut sampler_clone = unsafe { sampler.backup_unchecked() }; @@ -1062,11 +1062,11 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 18.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 18.0, .. }" ); assert_eq!( &alloc::format!("{sampler_clone:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1], total_weight: 20.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1], total_weight: 20.0, .. }" ); } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs index 461fe6904..505bf295c 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs @@ -535,7 +535,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [], total_weight: 0.0 }" + "DynamicAliasMethodStackSampler { exponents: [], total_weight: 0.0, .. }" ); for i in (1..=6_u8).rev() { @@ -544,7 +544,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 21.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 21.0, .. }" ); let mut sampler_clone = unsafe { sampler.backup_unchecked() }; @@ -560,11 +560,11 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 18.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 18.0, .. }" ); assert_eq!( &alloc::format!("{sampler_clone:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1], total_weight: 20.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1], total_weight: 20.0, .. }" ); } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 3176cdb11..66c190158 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,6 +1,5 @@ #![deny(clippy::pedantic)] #![feature(c_str_literals)] -#![feature(min_specialization)] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs index d519b533a..933cb48aa 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs @@ -1,4 +1,52 @@ -use crate::SimulationKernelPtx; +use necsim_core::{ + cogs::{ + CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, + LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, + }, + reporter::boolean::Boolean, +}; + +use necsim_impls_no_std::cogs::{ + active_lineage_sampler::singular::SingularActiveLineageSampler, + event_sampler::tracking::MinSpeciationTrackingEventSampler, +}; + +use rust_cuda::lend::RustToCuda; + +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, + ReportSpeciation: Boolean, + ReportDispersal: Boolean, +>( + std::marker::PhantomData<( + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + )>, +); macro_rules! link_kernel { ($habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty) => { diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 99632da65..59e32f0b4 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -5,7 +5,7 @@ use necsim_core::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, - reporter::boolean::Boolean, // reporter::boolean::{Boolean, False, True}, + reporter::boolean::{Boolean, False, True}, }; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, @@ -21,11 +21,7 @@ use crate::SimulationKernelPtx; // If `Kernel` is implemented for `ReportSpeciation` x `ReportDispersal`, i.e. // for {`False`, `True`} x {`False`, `True`} then it is implemented for all // `Boolean`s. However, Rust does not recognise that `Boolean` is closed over -// {`False`, `True`}. These default impls provide the necessary coersion. - -extern "C" { - fn unreachable_cuda_simulation_linking_reporter() -> !; -} +// {`False`, `True`}. This explicit impl provides the necessary coersion. #[allow(clippy::trait_duplication_in_bounds)] unsafe impl< @@ -46,23 +42,154 @@ unsafe impl< > CompiledKernelPtx< simulate, - > - for SimulationKernelPtx -//where -// SimulationKernelPtx: -// CompiledKernelPtx>, -// SimulationKernelPtx: -// CompiledKernelPtx>, -// SimulationKernelPtx: -// CompiledKernelPtx>, -// SimulationKernelPtx: -// CompiledKernelPtx>, + > for SimulationKernelPtx +where + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, { - default fn get_ptx() -> &'static CStr { - unsafe { unreachable_cuda_simulation_linking_reporter() } + #[inline] + fn get_ptx() -> &'static CStr { + match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { + (false, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + False, + >::get_ptx(), + (false, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + True, + >::get_ptx(), + (true, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + False, + >::get_ptx(), + (true, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + True, + >::get_ptx(), + } } - default fn get_entry_point() -> &'static CStr { - unsafe { unreachable_cuda_simulation_linking_reporter() } + #[inline] + fn get_entry_point() -> &'static CStr { + match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { + (false, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + False, + >::get_entry_point(), + (false, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + True, + >::get_entry_point(), + (true, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + False, + >::get_entry_point(), + (true, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + True, + >::get_entry_point(), + } } } diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index 09651390c..6591e1d91 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -36,41 +36,42 @@ use super::{super::super::BufferingSimulateArgsBuilder, rng}; macro_rules! match_scenario_algorithm { ( - ($algorithm:expr, $scenario:expr => $algscen:ident) { + ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $($(#[$meta:meta])* $algpat:pat => $algcode:block),* <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$meta])* $algpat => $algcode),* <=> - $($scenpat => $scencode),* + $($scenpat => $scencode => $scenty),* <=> } } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$meta:meta])* $algpat:pat => $algcode:block, $($(#[$metarem:meta])* $algpatrem:pat => $algcoderem:block),+ <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$metarem])* $algpatrem => $algcoderem),+ <=> - $($scenpat => $scencode),* + $($scenpat => $scencode => $scenty),* <=> $($tail)* $(#[$meta])* $algpat => { match $scenario { $($scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -80,10 +81,10 @@ macro_rules! match_scenario_algorithm { } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$meta:meta])* $algpat:pat => $algcode:block <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } @@ -93,6 +94,7 @@ macro_rules! match_scenario_algorithm { $(#[$meta])* $algpat => { match $scenario { $($scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -117,14 +119,14 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result { match_scenario_algorithm!( - (algorithm, scenario => scenario) + (algorithm, scenario => scenario: ScenarioTy) { #[cfg(feature = "rustcoalescence-algorithms-gillespie")] AlgorithmArgs::Gillespie(algorithm_args) => { rng::dispatch::< ::MathsCore, ::Rng<_>, - GillespieAlgorithm, _, R, P, + GillespieAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -135,7 +137,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( rng::dispatch::< ::MathsCore, ::Rng<_>, - EventSkippingAlgorithm, _, R, P, + EventSkippingAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -146,7 +148,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( rng::dispatch::< ::MathsCore, ::Rng<_>, - IndependentAlgorithm, _, R, P, + IndependentAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -154,47 +156,10 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( }, #[cfg(feature = "rustcoalescence-algorithms-cuda")] AlgorithmArgs::Cuda(algorithm_args) => { - fn coerce_cuda_dispatch< - 'p, - M: necsim_core::cogs::MathsCore + Sync, - G: necsim_core::cogs::PrimeableRng + rust_cuda::lend::RustToCuda + Sync, - O: Scenario, - R: Reporter, - P: LocalPartition<'p, R>, - >( - local_partition: P, - - sample: crate::args::config::sample::Sample, - algorithm_args: ::Arguments, - scenario: O, - pause_before: Option, - - ron_args: &str, - normalised_args: &mut BufferingSimulateArgsBuilder, - ) -> anyhow::Result where - O::Habitat: rust_cuda::lend::RustToCuda + Sync, - O::DispersalSampler< - necsim_impls_no_std::cogs::dispersal_sampler::in_memory::packed_alias::InMemoryPackedAliasDispersalSampler< - M, O::Habitat, G, - > - >: rust_cuda::lend::RustToCuda + Sync, - O::TurnoverRate: rust_cuda::lend::RustToCuda + Sync, - O::SpeciationProbability: rust_cuda::lend::RustToCuda + Sync, - { - rng::dispatch::< - M, - G, - CudaAlgorithm, _, R, P, - >( - local_partition, sample, algorithm_args, scenario, - pause_before, ron_args, normalised_args, - ) - } - - coerce_cuda_dispatch::< + rng::dispatch::< ::MathsCore, ::Rng<_>, - _, R, P, + CudaAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -206,40 +171,40 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitUniformTurnoverScenario, ScenarioArgs::SpatiallyExplicitTurnoverMap(scenario_args) => { SpatiallyExplicitTurnoverMapScenario::initialise( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitTurnoverMapScenario, ScenarioArgs::NonSpatial(scenario_args) => { NonSpatialScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => NonSpatialScenario, ScenarioArgs::AlmostInfinite(scenario_args) => { AlmostInfiniteScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => AlmostInfiniteScenario, ScenarioArgs::SpatiallyImplicit(scenario_args) => { SpatiallyImplicitScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => SpatiallyImplicitScenario, ScenarioArgs::WrappingNoise(scenario_args) => { WrappingNoiseScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - } + } => WrappingNoiseScenario }) } From 775094a0a2ef77214db2b3995999300db9825468 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 11 Jan 2024 05:13:28 +0000 Subject: [PATCH 07/28] Some more cleanup --- Cargo.lock | 7 ++- necsim/core/Cargo.toml | 4 +- necsim/core/src/landscape/extent.rs | 22 ++++---- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/cuda/src/cogs/rng.rs | 51 ++++++++++--------- necsim/impls/no-std/Cargo.toml | 4 +- .../src/cogs/event_sampler/independent.rs | 11 ++-- .../no-std/src/cogs/event_sampler/tracking.rs | 3 +- .../no-std/src/cogs/habitat/in_memory.rs | 3 +- .../no-std/src/cogs/habitat/non_spatial.rs | 3 +- necsim/impls/no-std/src/cogs/rng/seahash.rs | 2 - necsim/impls/no-std/src/cogs/rng/wyhash.rs | 2 - .../no-std/src/decomposition/equal/area.rs | 2 +- .../no-std/src/decomposition/equal/mod.rs | 2 +- .../no-std/src/decomposition/equal/weight.rs | 2 +- rustcoalescence/Cargo.toml | 4 +- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- .../cuda/src/parallelisation/monolithic.rs | 2 +- .../dispatch/valid/algorithm_scenario.rs | 1 - 21 files changed, 67 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c4ca197c..8fe5771de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "cargo_metadata", "colored", @@ -1601,7 +1601,6 @@ dependencies = [ "necsim-partitioning-mpi", "necsim-plugins-core", "ron", - "rust-cuda", "rustcoalescence-algorithms", "rustcoalescence-algorithms-cuda", "rustcoalescence-algorithms-gillespie", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index ef8b0dccc..f353fabd8 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"], optional = true } diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 12d8a0219..40c1012e5 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -3,10 +3,12 @@ use necsim_core_bond::OffByOneU32; use super::Location; #[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[derive(PartialEq, Eq, Copy, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] +#[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] -#[repr(C)] pub struct LandscapeExtent { x: u32, y: u32, @@ -58,7 +60,7 @@ impl LandscapeExtent { LocationIterator { x: self.x, y: self.y, - extent: *self, + extent: self.clone(), first_y: true, } } @@ -186,7 +188,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent, + extent: extent.clone(), first_y: true, } ); @@ -200,7 +202,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent, + extent: extent.clone(), first_y: false, } ); @@ -230,7 +232,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent, + extent: extent.clone(), first_y: true, } ); @@ -242,7 +244,7 @@ mod tests { LocationIterator { x: 0, y: 6812, - extent, + extent: extent.clone(), first_y: true, } ); @@ -255,7 +257,7 @@ mod tests { LocationIterator { x: 1386, y: 6813, - extent, + extent: extent.clone(), first_y: false, } ); @@ -269,7 +271,7 @@ mod tests { LocationIterator { x: 1386, y: 0, - extent, + extent: extent.clone(), first_y: false, } ); @@ -283,7 +285,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent, + extent: extent.clone(), first_y: false, } ); diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 984ba4a50..1140bfe9f 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index 671ebb79f..0bb7feb84 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -3,7 +3,10 @@ use core::marker::PhantomData; use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use const_type_layout::TypeGraphLayout; -use rust_cuda::safety::{PortableBitSemantics, StackOnly}; +use rust_cuda::{ + safety::{PortableBitSemantics, StackOnly}, + utils::adapter::RustToCudaWithPortableBitCloneSemantics, +}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -12,38 +15,45 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[cuda(free = "M", free = "R")] pub struct CudaRng where - R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, + R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout, { - inner: R, + #[cuda(embed)] + inner: RustToCudaWithPortableBitCloneSemantics, marker: PhantomData, } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone +impl + StackOnly + PortableBitSemantics + TypeGraphLayout + Copy> Copy + for CudaRng +{ +} + +#[allow(clippy::expl_impl_clone_on_copy)] +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone for CudaRng { fn clone(&self) -> Self { Self { - inner: self.inner, + inner: self.inner.clone(), marker: PhantomData::, } } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - From for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> From + for CudaRng { #[must_use] #[inline] fn from(rng: R) -> Self { Self { - inner: rng, + inner: rng.into(), marker: PhantomData::, } } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - RngCore for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> RngCore + for CudaRng { type Seed = >::Seed; @@ -51,7 +61,7 @@ impl + Copy + StackOnly + PortableBitSemantics + Typ #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { - inner: R::from_seed(seed), + inner: R::from_seed(seed).into(), marker: PhantomData::, } } @@ -63,10 +73,8 @@ impl + Copy + StackOnly + PortableBitSemantics + Typ } } -impl< - M: MathsCore, - R: PrimeableRng + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, - > PrimeableRng for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> + PrimeableRng for CudaRng { #[inline] fn prime_with(&mut self, location_index: u64, time_index: u64) { @@ -74,22 +82,19 @@ impl< } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - Serialize for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Serialize + for CudaRng { fn serialize(&self, serializer: S) -> Result { self.inner.serialize(serializer) } } -impl< - 'de, - M: MathsCore, - R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, - > Deserialize<'de> for CudaRng +impl<'de, M: MathsCore, R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout> + Deserialize<'de> for CudaRng { fn deserialize>(deserializer: D) -> Result { - let inner = R::deserialize(deserializer)?; + let inner = R::deserialize(deserializer)?.into(); Ok(Self { inner, diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 07f88df49..4465c45ee 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs index e3d51b674..17ac313d0 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs @@ -43,12 +43,7 @@ pub struct IndependentEventSampler< T: TurnoverRate, N: SpeciationProbability, > { - #[cfg_attr( - feature = "cuda", - cuda( - embed = "Option>" - ) - )] + #[cfg_attr(feature = "cuda", cuda(embed))] min_spec_sample: Option, marker: PhantomData<(M, H, G, X, D, T, N)>, } @@ -84,7 +79,7 @@ impl< { unsafe fn backup_unchecked(&self) -> Self { Self { - min_spec_sample: self.min_spec_sample, + min_spec_sample: self.min_spec_sample.clone(), marker: PhantomData::<(M, H, G, X, D, T, N)>, } } @@ -254,7 +249,7 @@ impl< ) -> Option { // `core::mem::replace()` would be semantically better // - but `clone()` does not spill to local memory - let old_value = self.min_spec_sample; + let old_value = self.min_spec_sample.clone(); self.min_spec_sample = new; diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index d541d21a4..5525256ad 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -26,7 +26,8 @@ pub trait MinSpeciationTrackingEventSampler< -> Option; } -#[derive(Clone, Copy, Debug, TypeLayout)] +#[derive(Clone, Debug, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, diff --git a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs index a1520550b..fb47d7a6d 100644 --- a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs @@ -21,6 +21,7 @@ pub struct InMemoryHabitat { habitat: Final>, #[cfg_attr(feature = "cuda", cuda(embed))] u64_injection: Final>, + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, marker: PhantomData, } @@ -31,7 +32,7 @@ impl Backup for InMemoryHabitat { Self { habitat: Final::new(self.habitat.clone()), u64_injection: Final::new(self.u64_injection.clone()), - extent: self.extent, + extent: self.extent.clone(), marker: PhantomData::, } } diff --git a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs index edbdf23f1..6d83fe75b 100644 --- a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs @@ -14,6 +14,7 @@ use necsim_core_bond::{OffByOneU32, OffByOneU64}; #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct NonSpatialHabitat { + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, deme: NonZeroU32, marker: PhantomData, @@ -58,7 +59,7 @@ impl NonSpatialHabitat { impl Backup for NonSpatialHabitat { unsafe fn backup_unchecked(&self) -> Self { Self { - extent: self.extent, + extent: self.extent.clone(), deme: self.deme, marker: PhantomData::, } diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 6487a3531..1da7d2e9a 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -17,8 +17,6 @@ pub struct SeaHash { marker: PhantomData, } -impl Copy for SeaHash {} - impl RngCore for SeaHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index df86272f6..eae21264c 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -23,8 +23,6 @@ pub struct WyHash { marker: PhantomData, } -impl Copy for WyHash {} - impl RngCore for WyHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/decomposition/equal/area.rs b/necsim/impls/no-std/src/decomposition/equal/area.rs index 8d61e05f2..1e418c019 100644 --- a/necsim/impls/no-std/src/decomposition/equal/area.rs +++ b/necsim/impls/no-std/src/decomposition/equal/area.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by area, otherwise returns `Err(Self)`. pub fn area(habitat: &H, subdomain: Partition) -> Result { - let extent = *habitat.get_extent(); + let extent = habitat.get_extent().clone(); let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/necsim/impls/no-std/src/decomposition/equal/mod.rs b/necsim/impls/no-std/src/decomposition/equal/mod.rs index d1ed79319..885c88103 100644 --- a/necsim/impls/no-std/src/decomposition/equal/mod.rs +++ b/necsim/impls/no-std/src/decomposition/equal/mod.rs @@ -34,7 +34,7 @@ impl> Backup for EqualDecomposition { unsafe fn backup_unchecked(&self) -> Self { Self { subdomain: self.subdomain, - extent: self.extent, + extent: self.extent.clone(), morton: self.morton, indices: self.indices.clone(), _marker: PhantomData::<(M, H)>, diff --git a/necsim/impls/no-std/src/decomposition/equal/weight.rs b/necsim/impls/no-std/src/decomposition/equal/weight.rs index a28dbffe1..cc5ec1e86 100644 --- a/necsim/impls/no-std/src/decomposition/equal/weight.rs +++ b/necsim/impls/no-std/src/decomposition/equal/weight.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by weight, otherwise returns `Err(Self)`. pub fn weight(habitat: &H, subdomain: Partition) -> Result { - let extent = *habitat.get_extent(); + let extent = habitat.get_extent().clone(); let mut total_habitat = 0; let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index 71c16c9b5..2a367eb33 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -14,7 +14,7 @@ necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] -rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:rust-cuda"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda"] [dependencies] necsim-core = { path = "../necsim/core" } @@ -34,8 +34,6 @@ rustcoalescence-algorithms-gillespie = { path = "algorithms/gillespie", optional rustcoalescence-algorithms-independent = { path = "algorithms/independent", optional = true } rustcoalescence-algorithms-cuda = { path = "algorithms/cuda", optional = true } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = [], optional = true } - clap = { version = "4.0", features = ["derive"] } anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index b51090971..e174cb6df 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index ae5937ec8..88fe2ff9b 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index f0a8873c0..544fe2511 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index eccfb8aba..e28cd0ef0 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -172,7 +172,6 @@ pub fn simulate< HostAndDeviceMutRef::with_new(&mut total_time_max, |total_time_max| -> Result<()> { HostAndDeviceMutRef::with_new(&mut total_steps_sum, |total_steps_sum| -> Result<()> { - // TODO: Pipeline async launches and callbacks of simulation/event analysis simulation.lend_to_cuda(|simulation_cuda_repr| -> Result<()> { while !slow_lineages.is_empty() && pause_before.map_or(true, |pause_before| level_time < pause_before) @@ -304,6 +303,7 @@ pub fn simulate< } event_buffer = event_buffer_host_async.synchronize()?; + // TODO: explore partial sorting on the GPU event_buffer.report_events_unordered(&mut proxy); proxy.local_partition().get_reporter().report_progress( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index 6591e1d91..54715cf40 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -105,7 +105,6 @@ macro_rules! match_scenario_algorithm { } #[allow(clippy::too_many_arguments)] -#[allow(clippy::too_many_lines)] // FIXME pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( local_partition: P, From 352b4e720e02455c3a13f9a87a01959429a6be79 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 11 Jan 2024 05:24:20 +0000 Subject: [PATCH 08/28] Add back missing Backup for SeaHash and WyHash rngs --- necsim/impls/no-std/src/cogs/rng/seahash.rs | 9 ++++++++- necsim/impls/no-std/src/cogs/rng/wyhash.rs | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 1da7d2e9a..93cc87ecd 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -1,6 +1,6 @@ use core::marker::PhantomData; -use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; +use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; @@ -17,6 +17,13 @@ pub struct SeaHash { marker: PhantomData, } +#[contract_trait] +impl Backup for SeaHash { + unsafe fn backup_unchecked(&self) -> Self { + self.clone() + } +} + impl RngCore for SeaHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index eae21264c..c4fdeed68 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -1,6 +1,6 @@ use core::marker::PhantomData; -use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; +use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; @@ -23,6 +23,13 @@ pub struct WyHash { marker: PhantomData, } +#[contract_trait] +impl Backup for WyHash { + unsafe fn backup_unchecked(&self) -> Self { + self.clone() + } +} + impl RngCore for WyHash { type Seed = [u8; 8]; From b24a056f6a7ffa593121044468ec687fdd7102b8 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 12 Jan 2024 03:46:38 +0000 Subject: [PATCH 09/28] Fix CUDA kernel extraneous pub exports --- Cargo.lock | 6 +- necsim/core/Cargo.toml | 4 +- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/no-std/Cargo.toml | 4 +- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- .../algorithms/cuda/gpu-kernel/src/lib.rs | 65 +++++++------------ rustcoalescence/algorithms/cuda/src/launch.rs | 8 ++- rustcoalescence/algorithms/cuda/src/lib.rs | 8 ++- 10 files changed, 47 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8fe5771de..2552e9173 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" +source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" +source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" +source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index f353fabd8..371db683b 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 1140bfe9f..5a09b89b0 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 4465c45ee..cd72e882e 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index e174cb6df..4773c0a7a 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 88fe2ff9b..9c347e4c7 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 544fe2511..03307a6ea 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 806d664c2..b2c7e4294 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -13,24 +13,29 @@ extern crate alloc; #[cfg(target_os = "cuda")] use core::ops::ControlFlow; +use core::sync::atomic::AtomicU64; -// FIXME: why pub use? -pub use necsim_core::{ +use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, + lineage::Lineage, reporter::boolean::Boolean, + simulation::Simulation, }; +use necsim_core_bond::{NonNegativeF64, PositiveF64}; -// FIXME: why pub use? -pub use necsim_impls_no_std::cogs::{ +use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; +use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; -// FIXME: why pub use? -pub use rust_cuda::lend::RustToCuda; +use rust_cuda::{ + kernel::param::{DeepPerThreadBorrow, PerThreadShallowCopy, PtxJit, ShallowInteriorMutable}, + lend::RustToCuda, +}; #[rust_cuda::kernel::kernel(pub use link! for impl)] #[kernel( @@ -55,45 +60,19 @@ pub fn simulate< ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - simulation: &rust_cuda::kernel::param::PtxJit< - rust_cuda::kernel::param::DeepPerThreadBorrow< - necsim_core::simulation::Simulation, - >, - >, - task_list: &mut rust_cuda::kernel::param::PtxJit< - rust_cuda::kernel::param::DeepPerThreadBorrow< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - >, - event_buffer_reporter: &mut rust_cuda::kernel::param::PtxJit< - rust_cuda::kernel::param::DeepPerThreadBorrow< - necsim_impls_cuda::event_buffer::EventBuffer, - >, - >, - min_spec_sample_buffer: &mut rust_cuda::kernel::param::PtxJit< - rust_cuda::kernel::param::DeepPerThreadBorrow< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - >, - next_event_time_buffer: &mut rust_cuda::kernel::param::PtxJit< - rust_cuda::kernel::param::DeepPerThreadBorrow< - necsim_impls_cuda::value_buffer::ValueBuffer< - necsim_core_bond::PositiveF64, - false, - true, - >, - >, - >, - total_time_max: &rust_cuda::kernel::param::ShallowInteriorMutable< - core::sync::atomic::AtomicU64, - >, - total_steps_sum: &rust_cuda::kernel::param::ShallowInteriorMutable< - core::sync::atomic::AtomicU64, + simulation: &PtxJit>>, + task_list: &mut PtxJit>>, + event_buffer_reporter: &mut PtxJit< + DeepPerThreadBorrow>, >, - max_steps: rust_cuda::kernel::param::PerThreadShallowCopy, - max_next_event_time: rust_cuda::kernel::param::PerThreadShallowCopy< - necsim_core_bond::NonNegativeF64, + min_spec_sample_buffer: &mut PtxJit< + DeepPerThreadBorrow>, >, + next_event_time_buffer: &mut PtxJit>>, + total_time_max: &ShallowInteriorMutable, + total_steps_sum: &ShallowInteriorMutable, + max_steps: PerThreadShallowCopy, + max_next_event_time: PerThreadShallowCopy, ) { // TODO: use simulation with non-allocating clone let mut simulation = unsafe { core::mem::ManuallyDrop::new(core::ptr::read(simulation)) }; diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 3c8f255a6..5a94abdc9 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -1,6 +1,10 @@ use std::marker::PhantomData; -use necsim_core::{cogs::MathsCore, reporter::Reporter, simulation::SimulationBuilder}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + reporter::Reporter, + simulation::SimulationBuilder, +}; use necsim_core_bond::NonNegativeF64; use necsim_impls_no_std::{ @@ -23,7 +27,7 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_gpu_kernel::{simulate, PrimeableRng}; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{ deps::rustacuda::{ diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index c977ae339..8d1625bfe 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -6,7 +6,11 @@ extern crate serde_derive_state; use initialiser::CudaLineageStoreSampleInitialiser; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_cuda::cogs::{maths::NvptxMathsCore, rng::CudaRng}; @@ -39,7 +43,7 @@ use rustcoalescence_algorithms::{ use rustcoalescence_scenarios::Scenario; use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; -use rustcoalescence_algorithms_cuda_gpu_kernel::{simulate, PrimeableRng}; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; From a632584dfbba439c1dc5bf18cd24374b2f2d5102 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 12 Jan 2024 03:59:47 +0000 Subject: [PATCH 10/28] Minor improvement of the event buffer hack --- necsim/impls/cuda/src/event_buffer.rs | 47 +++++++++++++++++++-------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index 2527ddef0..2fe7cb12a 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -1,4 +1,7 @@ -use core::fmt; +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; use const_type_layout::TypeGraphLayout; #[cfg(not(target_os = "cuda"))] @@ -198,14 +201,14 @@ impl event: impl Into< as EventType>::Event>, ) { if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = ( - core::mem::take(&mut self.event_mask.0), - core::mem::take(&mut self.event_buffer.0), + core::mem::take(&mut *self.event_mask), + core::mem::take(&mut *self.event_buffer), ) { mask.write(true); buffer.write(MaybeSome::Some(event.into())); - self.event_mask.0 = mask_rest; - self.event_buffer.0 = buffer_rest; + *self.event_mask = mask_rest; + *self.event_buffer = buffer_rest; } } } @@ -225,7 +228,7 @@ impl Reporter impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.0.is_empty(), + !self.event_buffer.is_empty(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { @@ -238,14 +241,14 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.0.is_empty(), + !self.event_buffer.is_empty(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { self.report_event(event.clone()); - self.event_mask.0 = &mut []; - self.event_buffer.0 = &mut []; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); } @@ -254,20 +257,20 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.0.is_empty(), + !self.event_buffer.is_empty(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { self.report_event(event.clone()); - self.event_mask.0 = &mut []; - self.event_buffer.0 = &mut []; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); impl_report!( #[debug_requires( - !self.event_buffer.0.is_empty(), + !self.event_buffer.is_empty(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { @@ -281,6 +284,24 @@ struct CudaExchangeSlice Deref + for CudaExchangeSlice +{ + type Target = &'static mut [T]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut + for CudaExchangeSlice +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl< T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, From 8c2cc90bc426b1d7514cb36159efd2f022a7dc66 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 12 Jan 2024 04:09:56 +0000 Subject: [PATCH 11/28] Remove unused control_flow_enum feature --- necsim/core/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/necsim/core/src/lib.rs b/necsim/core/src/lib.rs index 0888987ba..adef409b9 100644 --- a/necsim/core/src/lib.rs +++ b/necsim/core/src/lib.rs @@ -2,7 +2,6 @@ #![no_std] #![feature(const_type_name)] #![feature(offset_of)] -#![feature(control_flow_enum)] #![feature(min_specialization)] #[doc(hidden)] From 7fbf443fc035e4a1e6fc093a10dc61ca9974d2d7 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 12 Jan 2024 09:38:59 +0000 Subject: [PATCH 12/28] Revert Copy for [Indexed]Location --- necsim/core/src/cogs/emigration_exit.rs | 4 ++-- necsim/core/src/cogs/lineage_store.rs | 8 ++++---- necsim/core/src/event.rs | 2 +- necsim/core/src/landscape/extent.rs | 1 - necsim/core/src/landscape/location.rs | 18 +++++++++++------- necsim/core/src/lineage.rs | 7 ++++--- .../core/src/simulation/process/immigration.rs | 2 +- necsim/impls/cuda/src/utils.rs | 2 +- .../alias/location/mod.rs | 2 +- .../alias/location/sampler.rs | 2 +- .../in_memory/separable_alias/dispersal.rs | 4 ++-- .../cogs/dispersal_sampler/wrapping_noise.rs | 2 +- .../event_sampler/gillespie/conditional/mod.rs | 2 +- .../no-std/src/cogs/event_sampler/tracking.rs | 3 ++- .../src/cogs/habitat/wrapping_noise/mod.rs | 6 +++--- .../coherent/globally/gillespie/mod.rs | 2 +- .../coherent/globally/gillespie/store.rs | 6 +++--- .../coherent/globally/singleton_demes/mod.rs | 2 +- .../coherent/globally/singleton_demes/store.rs | 4 ++-- .../coherent/locally/classical/mod.rs | 2 +- .../coherent/locally/classical/store.rs | 2 +- .../src/cogs/origin_sampler/in_memory.rs | 2 +- .../src/cogs/origin_sampler/non_spatial.rs | 2 +- .../parallelisation/independent/landscape.rs | 2 +- .../src/individual/feather/dataframe.rs | 2 +- .../species/src/individual/sqlite/database.rs | 2 +- .../species/src/location/feather/dataframe.rs | 6 +++--- .../species/src/location/feather/reporter.rs | 4 ++-- necsim/plugins/tskit/src/tree/table.rs | 2 +- .../src/event_skipping/initialiser/fixup.rs | 2 +- .../gillespie/classical/initialiser/fixup.rs | 2 +- .../gillespie/turnover/initialiser/fixup.rs | 2 +- 32 files changed, 58 insertions(+), 53 deletions(-) diff --git a/necsim/core/src/cogs/emigration_exit.rs b/necsim/core/src/cogs/emigration_exit.rs index f594da68c..45c6d37c3 100644 --- a/necsim/core/src/cogs/emigration_exit.rs +++ b/necsim/core/src/cogs/emigration_exit.rs @@ -27,8 +27,8 @@ pub trait EmigrationExit, G: RngCore, S: LineageS ret_event_time, )) => { ret_lineage_reference == &old(global_reference.clone()) && - ret_dispersal_origin == &old(dispersal_origin) && - ret_dispersal_target == &old(dispersal_target) && + ret_dispersal_origin == &old(dispersal_origin.clone()) && + ret_dispersal_target == &old(dispersal_target.clone()) && ret_prior_time == &old(prior_time) && ret_event_time == &old(event_time) }, diff --git a/necsim/core/src/cogs/lineage_store.rs b/necsim/core/src/cogs/lineage_store.rs index c31899788..afc0d319d 100644 --- a/necsim/core/src/cogs/lineage_store.rs +++ b/necsim/core/src/cogs/lineage_store.rs @@ -49,12 +49,12 @@ pub trait LocallyCoherentLineageStore>: &ret ).is_some(), "lineage was activated")] #[debug_ensures( - self[&ret].indexed_location == old(lineage.indexed_location), + self[&ret].indexed_location == old(lineage.indexed_location.clone()), "lineage was added to indexed_location" )] #[debug_ensures( self.get_global_lineage_reference_at_indexed_location( - &old(lineage.indexed_location), old(habitat) + &old(lineage.indexed_location.clone()), old(habitat) ) == Some(&self[&ret].global_reference), "lineage is now indexed at indexed_location" )] @@ -117,7 +117,7 @@ pub trait GloballyCoherentLineageStore>: #[debug_ensures( self.get_local_lineage_references_at_location_unordered( - &old(*lineage.indexed_location.location()), old(habitat) + &old(lineage.indexed_location.location().clone()), old(habitat) ).last() == Some(&ret), "lineage is now indexed unordered at indexed_location.location()" )] @@ -125,7 +125,7 @@ pub trait GloballyCoherentLineageStore>: old(self.get_local_lineage_references_at_location_unordered( lineage.indexed_location.location(), old(habitat) ).len() + 1) == self.get_local_lineage_references_at_location_unordered( - &old(*lineage.indexed_location.location()), old(habitat) + &old(lineage.indexed_location.location().clone()), old(habitat) ).len(), "unordered active lineage index at given location has grown by 1" )] diff --git a/necsim/core/src/event.rs b/necsim/core/src/event.rs index 6fd2fd3c3..40108ae85 100644 --- a/necsim/core/src/event.rs +++ b/necsim/core/src/event.rs @@ -108,7 +108,7 @@ impl From for PackedEvent { global_lineage_reference: event.global_lineage_reference.clone(), prior_time: event.prior_time.get().make_negative(), event_time: event.event_time.get(), - origin: event.origin, + origin: event.origin.clone(), target: event.origin, coalescence: event.global_lineage_reference, } diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 40c1012e5..b3250cbca 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -5,7 +5,6 @@ use super::Location; #[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] #[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] -#[repr(C)] #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] diff --git a/necsim/core/src/landscape/location.rs b/necsim/core/src/landscape/location.rs index 7854775a9..7d8b2cf26 100644 --- a/necsim/core/src/landscape/location.rs +++ b/necsim/core/src/landscape/location.rs @@ -1,11 +1,13 @@ use serde::{Deserialize, Serialize}; -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive( - Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize, TypeLayout, + Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(deny_unknown_fields)] pub struct Location { x: u32, y: u32, @@ -35,12 +37,15 @@ impl From for Location { } #[derive( - Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize, TypeLayout, + Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] +#[allow(clippy::module_name_repetitions)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] pub struct IndexedLocation { + #[cfg_attr(feature = "cuda", cuda(embed))] location: Location, index: u32, } @@ -65,7 +70,6 @@ impl IndexedLocation { #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] #[serde(rename = "IndexedLocation")] -#[repr(C)] struct IndexedLocationRaw { x: u32, y: u32, diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 24e72049f..ee9687375 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -109,6 +109,7 @@ pub struct Lineage { #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "time")] pub last_event_time: NonNegativeF64, + #[cfg_attr(feature = "cuda", cuda(embed))] #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "loc")] pub indexed_location: IndexedLocation, @@ -118,7 +119,7 @@ impl Lineage { #[must_use] #[allow(clippy::no_effect_underscore_binding)] #[debug_ensures( - ret.indexed_location == old(indexed_location), + ret.indexed_location == old(indexed_location.clone()), "stores the indexed_location" )] #[debug_ensures(ret.last_event_time == 0.0_f64, "starts at t_0 = 0.0")] @@ -186,8 +187,8 @@ impl Backup for MigratingLineage { unsafe fn backup_unchecked(&self) -> Self { Self { global_reference: self.global_reference.backup_unchecked(), - dispersal_origin: self.dispersal_origin, - dispersal_target: self.dispersal_target, + dispersal_origin: self.dispersal_origin.clone(), + dispersal_target: self.dispersal_target.clone(), prior_time: self.prior_time, event_time: self.event_time, coalescence_rng_sample: self.coalescence_rng_sample.backup_unchecked(), diff --git a/necsim/core/src/simulation/process/immigration.rs b/necsim/core/src/simulation/process/immigration.rs index 93c20f2f5..aa1c49b3c 100644 --- a/necsim/core/src/simulation/process/immigration.rs +++ b/necsim/core/src/simulation/process/immigration.rs @@ -68,7 +68,7 @@ impl< active_lineage_sampler.push_active_lineage( Lineage { global_reference: migrating_lineage.global_reference.clone(), - indexed_location: dispersal_target, + indexed_location: dispersal_target.clone(), last_event_time: migrating_lineage.event_time.into(), }, simulation, diff --git a/necsim/impls/cuda/src/utils.rs b/necsim/impls/cuda/src/utils.rs index 8ff8033a5..39c1c8285 100644 --- a/necsim/impls/cuda/src/utils.rs +++ b/necsim/impls/cuda/src/utils.rs @@ -3,7 +3,7 @@ use core::mem::MaybeUninit; use rust_cuda::safety::StackOnly; #[derive(TypeLayout)] -#[repr(C)] +#[repr(transparent)] #[doc(hidden)] pub struct MaybeSome(MaybeUninit); diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs index b9b31d60b..cecc4cd34 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/mod.rs @@ -145,7 +145,7 @@ impl< match ordered_active_locations.last() { Some(location) if location == lineage.indexed_location.location() => (), - _ => ordered_active_locations.push(*lineage.indexed_location.location()), + _ => ordered_active_locations.push(lineage.indexed_location.location().clone()), }; let _local_reference = lineage_store diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs index eaa763420..0ccbe5a34 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/location/sampler.rs @@ -153,7 +153,7 @@ impl< ) { self.last_event_time = lineage.last_event_time; - let location = *lineage.indexed_location.location(); + let location = lineage.indexed_location.location().clone(); let _lineage_reference = simulation .lineage_store diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs index 9d04d0636..84bf3f84c 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/separable_alias/dispersal.rs @@ -23,11 +23,11 @@ impl, G: RngCore> DispersalSampler self.get_self_dispersal_probability_at_location(location, habitat); if self_dispersal_at_location >= 1.0_f64 { - return *location; + return location.clone(); } if self_dispersal_at_location > 0.0_f64 && rng.sample_event(self_dispersal_at_location) { - return *location; + return location.clone(); } self.sample_non_self_dispersal_from_location(location, habitat, rng) diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs index 632156d19..6f3075bf4 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs @@ -57,7 +57,7 @@ impl> DispersalSampler, G // If seperable dispersal is not required, this can be implemented as a // direct rejection sampling loop instead. if rng.sample_event(self.get_self_dispersal_probability_at_location(location, habitat)) { - *location + location.clone() } else { self.sample_non_self_dispersal_from_location(location, habitat, rng) } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs index 4e32a09c0..2775ad348 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/mod.rs @@ -207,7 +207,7 @@ impl< let (dispersal_target, coalescence) = simulation .coalescence_sampler .sample_coalescence_at_location( - *dispersal_origin.location(), + dispersal_origin.location().clone(), &simulation.habitat, &simulation.lineage_store, CoalescenceRngSample::new(rng), diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index 5525256ad..6804e70ea 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -32,6 +32,7 @@ pub trait MinSpeciationTrackingEventSampler< pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, sample_time: PositiveF64, + #[cfg_attr(feature = "cuda", cuda(embed))] sample_location: IndexedLocation, } @@ -48,7 +49,7 @@ impl SpeciationSample { *min_spec_sample = Some(Self { speciation_sample, sample_time, - sample_location: *sample_location, + sample_location: sample_location.clone(), }); }, }; diff --git a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs index f8a847121..892c02f11 100644 --- a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs +++ b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs @@ -67,7 +67,7 @@ impl WrappingNoiseHabitat { samples.push(sum_noise_octaves::( &noise, - Location::new( + &Location::new( (location & 0x0000_0000_FFFF_FFFF) as u32, ((location >> 32) & 0x0000_0000_FFFF_FFFF) as u32, ), @@ -165,7 +165,7 @@ impl Habitat for WrappingNoiseHabitat { let noise = sum_noise_octaves::( &self.noise, - *location, + location, self.persistence, self.scale, self.octaves, @@ -217,7 +217,7 @@ impl SingletonDemesHabitat for WrappingNoiseHabitat {} // Published at https://cmaher.github.io/posts/working-with-simplex-noise/ fn sum_noise_octaves( noise: &OpenSimplexNoise, - location: Location, + location: &Location, persistence: PositiveUnitF64, scale: PositiveUnitF64, octaves: NonZeroUsize, diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs index aa52e0783..95dee3b23 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs @@ -50,7 +50,7 @@ impl> Backup for GillespieLineageStore { location_to_lineage_references: self .location_to_lineage_references .iter() - .map(|(k, v)| (*k, v.iter().map(|x| x.backup_unchecked()).collect())) + .map(|(k, v)| (k.clone(), v.iter().map(|x| x.backup_unchecked()).collect())) .collect(), indexed_location_to_lineage_reference: self .indexed_location_to_lineage_reference diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs index 350db3741..35d94f6a9 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/store.rs @@ -64,11 +64,11 @@ impl> LocallyCoherentLineageStore ) -> InMemoryLineageReference { let lineages_at_location = self .location_to_lineage_references - .entry(*lineage.indexed_location.location()) + .entry(lineage.indexed_location.location().clone()) .or_default(); self.indexed_location_to_lineage_reference.insert( - lineage.indexed_location, + lineage.indexed_location.clone(), (lineage.global_reference.clone(), lineages_at_location.len()), ); @@ -136,7 +136,7 @@ impl> GloballyCoherentLineageStore if references.is_empty() { None } else { - Some(*location) + Some(location.clone()) } }) } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs index f28d514c4..64fb4bfdb 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/mod.rs @@ -58,7 +58,7 @@ impl> Backup for SingletonDemesLineage location_to_lineage_reference: self .location_to_lineage_reference .iter() - .map(|(k, v)| (*k, v.backup_unchecked())) + .map(|(k, v)| (k.clone(), v.backup_unchecked())) .collect(), _marker: PhantomData::<(M, H)>, } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs index 60340dcd6..4f62382fb 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/singleton_demes/store.rs @@ -64,7 +64,7 @@ impl> LocallyCoherentLineageStore InMemoryLineageReference { - let location = *lineage.indexed_location.location(); + let location = lineage.indexed_location.location().clone(); // Safety: a new unique reference is issued here, no cloning occurs let local_lineage_reference = @@ -109,7 +109,7 @@ impl> GloballyCoherentLineageStore> Backup for ClassicalLineageStore { indexed_location_to_lineage_reference: self .indexed_location_to_lineage_reference .iter() - .map(|(k, v)| (*k, v.backup_unchecked())) + .map(|(k, v)| (k.clone(), v.backup_unchecked())) .collect(), _marker: PhantomData::<(M, H)>, } diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs index c8bbd7ea7..978928377 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/store.rs @@ -58,7 +58,7 @@ impl> LocallyCoherentLineageStore lineage: Lineage, _habitat: &H, ) -> InMemoryLineageReference { - let indexed_location = lineage.indexed_location; + let indexed_location = lineage.indexed_location.clone(); // Safety: a new unique reference is issued here, no cloning occurs let local_lineage_reference = diff --git a/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs b/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs index dac8b6c22..4ea417586 100644 --- a/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/origin_sampler/in_memory.rs @@ -113,7 +113,7 @@ impl<'h, M: MathsCore, I: Iterator> Iterator for InMemoryOriginSampl self.next_location_index += u32::try_from(index_difference).unwrap(); Some(Lineage::new( - IndexedLocation::new(*next_location, self.next_location_index), + IndexedLocation::new(next_location.clone(), self.next_location_index), self.habitat, )) } diff --git a/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs b/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs index d940a32c7..8cebb7036 100644 --- a/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/origin_sampler/non_spatial.rs @@ -106,7 +106,7 @@ impl<'h, M: MathsCore, I: Iterator> Iterator for NonSpatialOriginSam self.next_location_index += u32::try_from(index_difference).unwrap(); Some(Lineage::new( - IndexedLocation::new(*next_location, self.next_location_index), + IndexedLocation::new(next_location.clone(), self.next_location_index), self.habitat, )) } diff --git a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs index d553ee6ec..75c83085d 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs @@ -197,7 +197,7 @@ pub fn simulate< prior_time, event_time, global_lineage_reference: global_reference.clone(), - target: dispersal_target, + target: dispersal_target.clone(), interaction: LineageInteraction::Maybe, }); diff --git a/necsim/plugins/species/src/individual/feather/dataframe.rs b/necsim/plugins/species/src/individual/feather/dataframe.rs index ff2085649..e817b9fb3 100644 --- a/necsim/plugins/species/src/individual/feather/dataframe.rs +++ b/necsim/plugins/species/src/individual/feather/dataframe.rs @@ -21,7 +21,7 @@ impl IndividualSpeciesFeatherReporter { lineage: &GlobalLineageReference, origin: &IndexedLocation, ) { - self.origins.insert(lineage.clone(), *origin); + self.origins.insert(lineage.clone(), origin.clone()); } pub(super) fn store_individual_speciation( diff --git a/necsim/plugins/species/src/individual/sqlite/database.rs b/necsim/plugins/species/src/individual/sqlite/database.rs index d932b240c..d472111eb 100644 --- a/necsim/plugins/species/src/individual/sqlite/database.rs +++ b/necsim/plugins/species/src/individual/sqlite/database.rs @@ -18,7 +18,7 @@ impl IndividualSpeciesSQLiteReporter { lineage: &GlobalLineageReference, origin: &IndexedLocation, ) { - self.origins.insert(lineage.clone(), *origin); + self.origins.insert(lineage.clone(), origin.clone()); } pub(super) fn store_individual_speciation( diff --git a/necsim/plugins/species/src/location/feather/dataframe.rs b/necsim/plugins/species/src/location/feather/dataframe.rs index f376fb83a..8e101276a 100644 --- a/necsim/plugins/species/src/location/feather/dataframe.rs +++ b/necsim/plugins/species/src/location/feather/dataframe.rs @@ -126,7 +126,7 @@ impl LocationSpeciesFeatherReporter { HashMap::default(); for (origin, identity, count) in std::mem::take(&mut self.speciated) { - species_index.insert((origin, identity.clone()), counts.len()); + species_index.insert((origin.clone(), identity.clone()), counts.len()); xs.push(origin.x()); ys.push(origin.y()); @@ -158,7 +158,7 @@ impl LocationSpeciesFeatherReporter { let count = self.counts.get(&lineage).copied().unwrap_or(1_u64); if let Some(identity) = self.species.get(&ancestor) { - match species_index.entry((origin, identity.clone())) { + match species_index.entry((origin.clone(), identity.clone())) { // Update the existing per-location-species record Entry::Occupied(occupied) => counts[*occupied.get()] += count, // Create a new per-location-species record @@ -219,7 +219,7 @@ impl LocationSpeciesFeatherReporter { // No-longer activate lineages and the anchor may share // location-species records with each other - match species_index.entry((origin, anchor_identity.clone())) { + match species_index.entry((origin.clone(), anchor_identity.clone())) { // Update the existing per-location-species record Entry::Occupied(occupied) => counts[*occupied.get()] += count, // Create a new per-location-species record diff --git a/necsim/plugins/species/src/location/feather/reporter.rs b/necsim/plugins/species/src/location/feather/reporter.rs index 718a3a7f7..7f7fc5ad3 100644 --- a/necsim/plugins/species/src/location/feather/reporter.rs +++ b/necsim/plugins/species/src/location/feather/reporter.rs @@ -7,7 +7,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if speciation.prior_time == 0.0_f64 { - self.store_individual_origin(&speciation.global_lineage_reference, *speciation.origin.location()); + self.store_individual_origin(&speciation.global_lineage_reference, speciation.origin.location().clone()); } if Some(speciation) == self.last_speciation_event.as_ref() { @@ -31,7 +31,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if dispersal.prior_time == 0.0_f64 { - self.store_individual_origin(&dispersal.global_lineage_reference, *dispersal.origin.location()); + self.store_individual_origin(&dispersal.global_lineage_reference, dispersal.origin.location().clone()); } // Only update the active frontier with `deduplication_probability` diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs index be7f7423f..ff717e346 100644 --- a/necsim/plugins/tskit/src/tree/table.rs +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -17,7 +17,7 @@ impl TskitTreeReporter { reference: &GlobalLineageReference, location: &IndexedLocation, ) { - self.origins.insert(reference.clone(), *location); + self.origins.insert(reference.clone(), location.clone()); } pub(super) fn store_individual_speciation( diff --git a/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs index e47854154..661b792d9 100644 --- a/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/event_skipping/initialiser/fixup.rs @@ -193,7 +193,7 @@ where global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location, + origin: coalescing_lineage.indexed_location.clone(), target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs index e70408ff3..1011efbb5 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/initialiser/fixup.rs @@ -151,7 +151,7 @@ impl, M: MathsCore, G: RngCore, O: Scena global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location, + origin: coalescing_lineage.indexed_location.clone(), target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs index 1dd66092c..7fc69036f 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/initialiser/fixup.rs @@ -170,7 +170,7 @@ impl, M: MathsCore, G: RngCore, O: Scena global_lineage_reference: coalescing_lineage.global_reference, prior_time: coalescing_lineage.last_event_time, event_time: self.restart_at, - origin: coalescing_lineage.indexed_location, + origin: coalescing_lineage.indexed_location.clone(), target: coalescing_lineage.indexed_location, interaction: LineageInteraction::Coalescence(parent), }) From be20a8de6a6a9497561a819224d1b1181d1b29c8 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 12 Jan 2024 09:48:27 +0000 Subject: [PATCH 13/28] Revert new clone --- necsim/plugins/species/src/location/feather/dataframe.rs | 4 ++-- necsim/plugins/species/src/location/feather/reporter.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/necsim/plugins/species/src/location/feather/dataframe.rs b/necsim/plugins/species/src/location/feather/dataframe.rs index 8e101276a..653472f6d 100644 --- a/necsim/plugins/species/src/location/feather/dataframe.rs +++ b/necsim/plugins/species/src/location/feather/dataframe.rs @@ -26,9 +26,9 @@ impl LocationSpeciesFeatherReporter { pub(super) fn store_individual_origin( &mut self, lineage: &GlobalLineageReference, - origin: Location, + origin: &Location, ) { - self.origins.insert(lineage.clone(), origin); + self.origins.insert(lineage.clone(), origin.clone()); } pub(super) fn store_individual_speciation( diff --git a/necsim/plugins/species/src/location/feather/reporter.rs b/necsim/plugins/species/src/location/feather/reporter.rs index 7f7fc5ad3..e92ae4726 100644 --- a/necsim/plugins/species/src/location/feather/reporter.rs +++ b/necsim/plugins/species/src/location/feather/reporter.rs @@ -7,7 +7,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if speciation.prior_time == 0.0_f64 { - self.store_individual_origin(&speciation.global_lineage_reference, speciation.origin.location().clone()); + self.store_individual_origin(&speciation.global_lineage_reference, speciation.origin.location()); } if Some(speciation) == self.last_speciation_event.as_ref() { @@ -31,7 +31,7 @@ impl Reporter for LocationSpeciesFeatherReporter { self.init = true; if dispersal.prior_time == 0.0_f64 { - self.store_individual_origin(&dispersal.global_lineage_reference, dispersal.origin.location().clone()); + self.store_individual_origin(&dispersal.global_lineage_reference, dispersal.origin.location()); } // Only update the active frontier with `deduplication_probability` From 7d2633ca335658d26d3829aa00f8569f64e0397b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 13 Jan 2024 09:24:31 +0000 Subject: [PATCH 14/28] Update to rust-cuda with async kernel launch async return --- Cargo.lock | 6 ++-- necsim/core/Cargo.toml | 4 +-- necsim/impls/cuda/Cargo.toml | 4 +-- necsim/impls/no-std/Cargo.toml | 4 +-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +-- rustcoalescence/algorithms/cuda/src/launch.rs | 34 ++++++++++--------- .../cuda/src/parallelisation/monolithic.rs | 18 +++++----- 9 files changed, 41 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2552e9173..711fe0465 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index 371db683b..3d23d4987 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 5a09b89b0..07a1cb4e2 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index cd72e882e..3835f58d1 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 4773c0a7a..66acfc0e3 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 9c347e4c7..416af0ef3 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 03307a6ea..5a1ffb5f6 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 5a94abdc9..44e0e66f6 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -177,7 +177,7 @@ where }; let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { - let stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); + let mut stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); let mut kernel = TypedPtxKernel::new::(Some(Box::new(|kernel| { crate::info::print_kernel_function_attributes("simulate", kernel); @@ -190,21 +190,23 @@ where ptx_jit: args.ptx_jit, }; - let launcher = Launcher { - stream: &stream, - kernel: &mut kernel, - config, - }; - - parallelisation::monolithic::simulate( - &mut simulation, - launcher, - (args.dedup_cache, args.step_slice), - lineages, - event_slice, - pause_before, - local_partition, - ) + rust_cuda::host::Stream::with(&mut stream, |stream| { + let launcher = Launcher { + stream, + kernel: &mut kernel, + config, + }; + + parallelisation::monolithic::simulate( + &mut simulation, + launcher, + (args.dedup_cache, args.step_slice), + lineages, + event_slice, + pause_before, + local_partition, + ) + }) }) .map_err(CudaError::from)?; diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index e28cd0ef0..ccf21262b 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -249,14 +249,14 @@ pub fn simulate< let mut task_list_cuda_async = task_list.move_to_device_async(launcher.stream)?; - launcher.launch9_async( - simulation_cuda_repr.as_async(launcher.stream).as_ref(), - task_list_cuda_async.as_mut_async().proj_mut(), - event_buffer_cuda_async.as_mut_async().proj_mut(), - min_spec_sample_buffer_cuda_async.as_mut_async().proj_mut(), - next_event_time_buffer_cuda_async.as_mut_async().proj_mut(), - total_time_max.as_ref().as_async(launcher.stream).as_ref(), - total_steps_sum.as_ref().as_async(launcher.stream).as_ref(), + let launch = launcher.launch9_async( + simulation_cuda_repr.as_async(launcher.stream).extract_ref(), + task_list_cuda_async.as_mut_async(), + event_buffer_cuda_async.as_mut_async(), + min_spec_sample_buffer_cuda_async.as_mut_async(), + next_event_time_buffer_cuda_async.as_mut_async(), + total_time_max.as_ref().as_async(launcher.stream).extract_ref(), + total_steps_sum.as_ref().as_async(launcher.stream).extract_ref(), step_slice.get(), level_time, )?; @@ -276,6 +276,8 @@ pub fn simulate< next_event_time_buffer = next_event_time_buffer_host_async.synchronize()?; min_spec_sample_buffer = min_spec_sample_buffer_host_async.synchronize()?; + launch.synchronize()?; + // Fetch the completion of the tasks for ((mut spec_sample, mut next_event_time), mut task) in min_spec_sample_buffer From 10acf51b3a7e0e1aa844ad324dba80c9c7944351 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 14 Jan 2024 04:37:38 +0000 Subject: [PATCH 15/28] Update to latest rust-cuda --- Cargo.lock | 6 +++--- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 711fe0465..963df95f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" +source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" +source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" +source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index 3d23d4987..b67471036 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 07a1cb4e2..8313bc544 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 3835f58d1..faf24a8e9 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 66acfc0e3..91fa77f76 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 416af0ef3..f1849c38f 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 5a1ffb5f6..a29baa472 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "kernel"] } From a809f619c214495da05b8cd55085a9f3640f407a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 14 Jan 2024 04:38:12 +0000 Subject: [PATCH 16/28] Fix rustfmt --- .../algorithms/cuda/src/parallelisation/monolithic.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index ccf21262b..213f6aa11 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -255,8 +255,14 @@ pub fn simulate< event_buffer_cuda_async.as_mut_async(), min_spec_sample_buffer_cuda_async.as_mut_async(), next_event_time_buffer_cuda_async.as_mut_async(), - total_time_max.as_ref().as_async(launcher.stream).extract_ref(), - total_steps_sum.as_ref().as_async(launcher.stream).extract_ref(), + total_time_max + .as_ref() + .as_async(launcher.stream) + .extract_ref(), + total_steps_sum + .as_ref() + .as_async(launcher.stream) + .extract_ref(), step_slice.get(), level_time, )?; From 4e79d0cf2560a296e060fdd5e98b23b07b81a46e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 16 Jan 2024 04:22:18 +0000 Subject: [PATCH 17/28] Temporary fix to allow CUDA algorithm linking --- necsim/impls/cuda/src/cogs/maths.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/necsim/impls/cuda/src/cogs/maths.rs b/necsim/impls/cuda/src/cogs/maths.rs index 6326ffa2a..f68e69135 100644 --- a/necsim/impls/cuda/src/cogs/maths.rs +++ b/necsim/impls/cuda/src/cogs/maths.rs @@ -36,11 +36,14 @@ impl MathsCore for NvptxMathsCore { } #[cfg(not(target_os = "cuda"))] { - extern "C" { - fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; - } + // extern "C" { + // fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; + // } + + // unsafe { nvptx_maths_core_ln_on_cpu(x) } - unsafe { nvptx_maths_core_ln_on_cpu(x) } + // TODO: disallow using NvptxMathsCore::ln on CPU + unsafe { core::intrinsics::logf64(x) } } } From 4673ccf77fd53e49f6234853264b14d1a25a2ba3 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 18 Jan 2024 09:59:01 +0000 Subject: [PATCH 18/28] Small cleanup, mostly of unused clippy allows --- Cargo.lock | 82 +++++++++---------- necsim/core/src/cogs/coalescence_sampler.rs | 1 - necsim/core/src/event.rs | 1 - necsim/core/src/landscape/extent.rs | 2 +- necsim/core/src/landscape/location.rs | 9 ++ necsim/core/src/landscape/mod.rs | 2 +- necsim/core/src/lineage.rs | 2 +- necsim/core/src/reporter/mod.rs | 6 +- necsim/core/src/simulation/mod.rs | 2 +- necsim/impls/cuda/src/cogs/rng.rs | 19 +---- necsim/impls/no-std/src/alias/mod.rs | 11 +-- .../gillespie/conditional/probability.rs | 1 - .../impls/no-std/src/cogs/maths/intrinsics.rs | 2 - necsim/impls/no-std/src/cogs/rng/seahash.rs | 2 +- necsim/impls/no-std/src/cogs/rng/wyhash.rs | 2 +- necsim/partitioning/mpi/src/partition/mod.rs | 4 +- .../algorithms/cuda/cpu-kernel/src/patch.rs | 1 - rustcoalescence/algorithms/cuda/src/lib.rs | 3 - .../gillespie/src/gillespie/classical/mod.rs | 2 - .../gillespie/src/gillespie/turnover/mod.rs | 3 - .../algorithms/independent/src/lib.rs | 1 - .../scenarios/src/spatially_explicit/mod.rs | 4 +- 22 files changed, 70 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 963df95f3..b712fc8e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "anstream" -version = "0.6.5" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" +checksum = "3fde6067df7359f2d6335ec1a50c1f8f825801687d10da0cc4c6b08e3f6afd15" dependencies = [ "anstyle", "anstyle-parse", @@ -141,9 +141,9 @@ checksum = "23ce669cd6c8588f79e15cf450314f9638f967fc5770ff1c7c1deb0925ea7cfa" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bincode" @@ -207,9 +207,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" dependencies = [ "serde", ] @@ -345,9 +345,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.13" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bdc885e4cacc7f7c9eedc1ef6da641603180c783c41a15c264944deeaab642" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", "clap_derive", @@ -355,9 +355,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.12" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstream", "anstyle", @@ -680,9 +680,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "js-sys", @@ -811,15 +811,15 @@ dependencies = [ [[package]] name = "jpeg-decoder" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" dependencies = [ "wasm-bindgen", ] @@ -838,9 +838,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libloading" @@ -870,9 +870,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "log" @@ -1283,9 +1283,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "planus" @@ -1460,7 +1460,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64", - "bitflags 2.4.1", + "bitflags 2.4.2", "serde", "serde_derive", ] @@ -1471,7 +1471,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -1722,11 +1722,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", @@ -1892,9 +1892,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" [[package]] name = "stable_deref_trait" @@ -1977,9 +1977,9 @@ dependencies = [ [[package]] name = "tiff" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d172b0f4d3fba17ba89811858b9d3d97f928aece846475bbda076ca46736211" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" dependencies = [ "flate2", "jpeg-decoder", @@ -2156,9 +2156,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2166,9 +2166,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" dependencies = [ "bumpalo", "log", @@ -2181,9 +2181,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2191,9 +2191,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", @@ -2204,9 +2204,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" [[package]] name = "weezl" diff --git a/necsim/core/src/cogs/coalescence_sampler.rs b/necsim/core/src/cogs/coalescence_sampler.rs index 93af7bc92..f4d0aa4da 100644 --- a/necsim/core/src/cogs/coalescence_sampler.rs +++ b/necsim/core/src/cogs/coalescence_sampler.rs @@ -28,7 +28,6 @@ pub trait CoalescenceSampler, S: LineageStore> ) -> (IndexedLocation, LineageInteraction); } -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, PartialEq, Serialize, Deserialize, TypeLayout)] #[repr(transparent)] pub struct CoalescenceRngSample(ClosedOpenUnitF64); diff --git a/necsim/core/src/event.rs b/necsim/core/src/event.rs index 40108ae85..af42ac633 100644 --- a/necsim/core/src/event.rs +++ b/necsim/core/src/event.rs @@ -55,7 +55,6 @@ pub struct Dispersal { } #[allow(clippy::module_name_repetitions)] -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, Clone, Serialize, Deserialize, TypeLayout)] #[repr(C)] pub struct SpeciationEvent { diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index b3250cbca..7de809e69 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -2,7 +2,7 @@ use necsim_core_bond::OffByOneU32; use super::Location; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(ignore))] diff --git a/necsim/core/src/landscape/location.rs b/necsim/core/src/landscape/location.rs index 7d8b2cf26..6bcc520a6 100644 --- a/necsim/core/src/landscape/location.rs +++ b/necsim/core/src/landscape/location.rs @@ -1,5 +1,7 @@ use serde::{Deserialize, Serialize}; +use crate::cogs::Backup; + #[allow(clippy::module_name_repetitions)] #[derive( Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, @@ -13,6 +15,13 @@ pub struct Location { y: u32, } +#[contract_trait] +impl Backup for Location { + unsafe fn backup_unchecked(&self) -> Self { + self.clone() + } +} + impl Location { #[must_use] pub const fn new(x: u32, y: u32) -> Self { diff --git a/necsim/core/src/landscape/mod.rs b/necsim/core/src/landscape/mod.rs index 6c05344ca..41a00b87f 100644 --- a/necsim/core/src/landscape/mod.rs +++ b/necsim/core/src/landscape/mod.rs @@ -1,6 +1,6 @@ mod extent; mod location; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use extent::{LandscapeExtent, LocationIterator}; pub use location::{IndexedLocation, Location}; diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index ee9687375..398973fd0 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -95,7 +95,7 @@ impl From> for LineageInteraction { } } -#[allow(clippy::unsafe_derive_deserialize, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)] #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] diff --git a/necsim/core/src/reporter/mod.rs b/necsim/core/src/reporter/mod.rs index 821ae269f..a934f58b1 100644 --- a/necsim/core/src/reporter/mod.rs +++ b/necsim/core/src/reporter/mod.rs @@ -12,11 +12,11 @@ use used::MaybeUsed; pub mod boolean; pub mod used; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use combinator::ReporterCombinator; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use filter::FilteredReporter; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use null::NullReporter; pub trait Reporter: core::fmt::Debug { diff --git a/necsim/core/src/simulation/mod.rs b/necsim/core/src/simulation/mod.rs index c5356f1a2..61575ed5b 100644 --- a/necsim/core/src/simulation/mod.rs +++ b/necsim/core/src/simulation/mod.rs @@ -18,7 +18,7 @@ use crate::{ reporter::Reporter, }; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use builder::{Simulation, SimulationBuilder}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index 0bb7feb84..8237ed1cf 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -11,7 +11,7 @@ use rust_cuda::{ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[allow(clippy::module_name_repetitions)] -#[derive(Debug, rust_cuda::lend::LendRustToCuda)] +#[derive(Debug, Clone, rust_cuda::lend::LendRustToCuda)] #[cuda(free = "M", free = "R")] pub struct CudaRng where @@ -22,23 +22,6 @@ where marker: PhantomData, } -impl + StackOnly + PortableBitSemantics + TypeGraphLayout + Copy> Copy - for CudaRng -{ -} - -#[allow(clippy::expl_impl_clone_on_copy)] -impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone - for CudaRng -{ - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - marker: PhantomData::, - } - } -} - impl + StackOnly + PortableBitSemantics + TypeGraphLayout> From for CudaRng { diff --git a/necsim/impls/no-std/src/alias/mod.rs b/necsim/impls/no-std/src/alias/mod.rs index 2eec4ef53..c6a22d8ea 100644 --- a/necsim/impls/no-std/src/alias/mod.rs +++ b/necsim/impls/no-std/src/alias/mod.rs @@ -1,3 +1,5 @@ +use core::cmp::Ordering; + use alloc::vec::Vec; use necsim_core::cogs::{MathsCore, RngCore}; @@ -62,11 +64,10 @@ impl AliasMethodSampler { }; Ks[underfull_index] = Es[overfull_index]; - #[allow(clippy::comparison_chain)] - if Us[overfull_index] < 1.0_f64 { - underfull_indices.push(overfull_index); - } else if Us[overfull_index] > 1.0_f64 { - overfull_indices.push(overfull_index); + match Us[overfull_index].cmp(&NonNegativeF64::one()) { + Ordering::Less => underfull_indices.push(overfull_index), + Ordering::Equal => (), + Ordering::Greater => overfull_indices.push(overfull_index), } } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs index 38a97fb42..8d7ccaba2 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/gillespie/conditional/probability.rs @@ -17,7 +17,6 @@ pub struct ProbabilityAtLocation { } impl ProbabilityAtLocation { - #[allow(clippy::trivially_copy_pass_by_ref)] pub fn new< M: MathsCore, H: Habitat, diff --git a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs index 7375c9fc8..46801aac8 100644 --- a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs +++ b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs @@ -1,4 +1,2 @@ -#![allow(clippy::useless_attribute)] - #[allow(clippy::module_name_repetitions)] pub use necsim_core_maths::IntrinsicsMathsCore; diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 93cc87ecd..bbfc0df7b 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -4,7 +4,7 @@ use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[serde(deny_unknown_fields)] #[layout(free = "M")] diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index c4fdeed68..dfa2d4d3e 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -11,7 +11,7 @@ const P1: u64 = 0xe703_7ed1_a0b4_28db; const P2: u64 = 0x8ebc_6af0_9c88_c6e3; const P5: u64 = 0xeb44_acca_b455_d165; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[layout(free = "M")] #[serde(deny_unknown_fields)] diff --git a/necsim/partitioning/mpi/src/partition/mod.rs b/necsim/partitioning/mpi/src/partition/mod.rs index 90055f711..d05940d3d 100644 --- a/necsim/partitioning/mpi/src/partition/mod.rs +++ b/necsim/partitioning/mpi/src/partition/mod.rs @@ -13,9 +13,9 @@ mod parallel; mod root; mod utils; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use parallel::MpiParallelPartition; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use root::MpiRootPartition; #[allow(clippy::module_name_repetitions)] diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 59e32f0b4..04404ad9b 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -23,7 +23,6 @@ use crate::SimulationKernelPtx; // `Boolean`s. However, Rust does not recognise that `Boolean` is closed over // {`False`, `True`}. This explicit impl provides the necessary coersion. -#[allow(clippy::trait_duplication_in_bounds)] unsafe impl< M: MathsCore + Sync, H: Habitat + RustToCuda + Sync, diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index 8d1625bfe..8aa09353f 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -76,7 +76,6 @@ impl AlgorithmDefaults for CudaAlgorithm { type Rng = CudaRng>; } -#[allow(clippy::trait_duplication_in_bounds)] impl< 'p, M: MathsCore + Sync, @@ -311,7 +310,6 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, rng: G, @@ -355,7 +353,6 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, rng: G, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs index fd8ffe02e..892b7e285 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs @@ -40,7 +40,6 @@ where O::LineageStore>: LocallyCoherentLineageStore, { - #[allow(clippy::too_many_lines)] fn initialise_and_simulate>( args: Self::Arguments, rng: G, @@ -92,7 +91,6 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, rng: G, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs index b8273a461..a08985da7 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs @@ -48,7 +48,6 @@ where get_gillespie_logical_partition(args, local_partition) } - #[allow(clippy::shadow_unrelated, clippy::too_many_lines)] default fn initialise_and_simulate>( args: Self::Arguments, rng: G, @@ -72,7 +71,6 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] default fn resume_and_simulate< I: Iterator, L: ExactSizeIterator, @@ -104,7 +102,6 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] default fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, rng: G, diff --git a/rustcoalescence/algorithms/independent/src/lib.rs b/rustcoalescence/algorithms/independent/src/lib.rs index 11eeba8e7..7550642c9 100644 --- a/rustcoalescence/algorithms/independent/src/lib.rs +++ b/rustcoalescence/algorithms/independent/src/lib.rs @@ -121,7 +121,6 @@ impl< /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, rng: G, diff --git a/rustcoalescence/scenarios/src/spatially_explicit/mod.rs b/rustcoalescence/scenarios/src/spatially_explicit/mod.rs index c4bc85206..d40d5c984 100644 --- a/rustcoalescence/scenarios/src/spatially_explicit/mod.rs +++ b/rustcoalescence/scenarios/src/spatially_explicit/mod.rs @@ -1,11 +1,11 @@ mod maps; mod turnover; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use turnover::map::{ SpatiallyExplicitTurnoverMapArguments, SpatiallyExplicitTurnoverMapScenario, }; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use turnover::uniform::{ SpatiallyExplicitUniformTurnoverArguments, SpatiallyExplicitUniformTurnoverScenario, }; From 7a25f7025591983da5a2957ed0c6c84b1ecbe349 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 18 Jan 2024 10:15:47 +0000 Subject: [PATCH 19/28] Small improvement to CUDA EventBuffer --- necsim/core/src/simulation/mod.rs | 10 +++++++--- necsim/impls/cuda/src/event_buffer.rs | 15 ++++++++++----- .../parallelisation/independent/individuals.rs | 2 +- .../src/parallelisation/independent/landscape.rs | 2 +- .../parallelisation/independent/monolithic/mod.rs | 2 +- .../src/parallelisation/monolithic/averaging.rs | 2 +- .../src/parallelisation/monolithic/lockstep.rs | 4 ++-- .../src/parallelisation/monolithic/monolithic.rs | 2 +- .../src/parallelisation/monolithic/optimistic.rs | 2 +- .../monolithic/optimistic_lockstep.rs | 6 +++--- .../algorithms/cuda/gpu-kernel/src/lib.rs | 11 +++++++---- 11 files changed, 35 insertions(+), 23 deletions(-) diff --git a/necsim/core/src/simulation/mod.rs b/necsim/core/src/simulation/mod.rs index 61575ed5b..29368e5a6 100644 --- a/necsim/core/src/simulation/mod.rs +++ b/necsim/core/src/simulation/mod.rs @@ -51,7 +51,7 @@ impl< #[inline] pub fn simulate_incremental_early_stop< - F: FnMut(&Self, u64, PositiveF64) -> ControlFlow<(), ()>, + F: FnMut(&Self, u64, PositiveF64, &P) -> ControlFlow<(), ()>, P: Reporter, >( &mut self, @@ -69,13 +69,17 @@ impl< .map(|lineage| (lineage.event_time, lineage.tie_breaker)); let self_ptr = self as *const Self; + let reporter_ptr = reporter as *const P; let old_rng = unsafe { self.rng.backup_unchecked() }; let mut early_stop_flow = ControlFlow::Continue(()); let early_peek_stop = |next_event_time| { // Safety: We are only passing in an immutable reference - early_stop_flow = early_stop(unsafe { &*self_ptr }, steps, next_event_time); + early_stop_flow = + early_stop(unsafe { &*self_ptr }, steps, next_event_time, unsafe { + &*reporter_ptr + }); if early_stop_flow.is_break() { return ControlFlow::Break(()); @@ -131,6 +135,6 @@ impl< #[inline] pub fn simulate(mut self, reporter: &mut P) -> (NonNegativeF64, u64) { - self.simulate_incremental_early_stop(|_, _, _| ControlFlow::Continue(()), reporter) + self.simulate_incremental_early_stop(|_, _, _, _| ControlFlow::Continue(()), reporter) } } diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index 2fe7cb12a..1a08d85ca 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -196,6 +196,11 @@ impl impl EventBuffer { + #[must_use] + pub fn can_buffer_next_event(&self) -> bool { + !self.event_buffer.is_empty() + } + fn report_event( &mut self, event: impl Into< as EventType>::Event>, @@ -228,7 +233,7 @@ impl Reporter impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.is_empty(), + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { @@ -241,7 +246,7 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.is_empty(), + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { @@ -257,7 +262,7 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.is_empty(), + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { @@ -270,7 +275,7 @@ impl Reporter for EventBuffer { impl_report!( #[debug_requires( - !self.event_buffer.is_empty(), + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { @@ -279,7 +284,7 @@ impl Reporter for EventBuffer { ); } -// FIXME: find a less hacky hack +// TODO: find a prettier workaround struct CudaExchangeSlice( &'static mut [T], ); diff --git a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs index 93fbe37f0..c6355adf4 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs @@ -125,7 +125,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs index 75c83085d..0177ec941 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs @@ -137,7 +137,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs index b3ac9a64c..faecd44c9 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs @@ -226,7 +226,7 @@ pub fn simulate< previous_next_event_time = None; let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, _| { previous_next_event_time = Some(next_event_time); if steps >= step_slice.get() || next_event_time >= level_time { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs index 187ee4038..143b61156 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs @@ -78,7 +78,7 @@ pub fn simulate< let next_safe_time = global_safe_time + independent_time_slice; let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs index 174c2c358..2b29631aa 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs @@ -77,7 +77,7 @@ pub fn simulate< // Simulate for zero-steps (immediate early stop) without side effects // to peek the next local event time simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { next_local_time = Some(next_event_time); ControlFlow::Break(()) @@ -102,7 +102,7 @@ pub fn simulate< // The partition with the next event gets to simulate just the next step if let Ok(next_global_time) = local_partition.reduce_vote_min_time(next_local_time) { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs index 895344836..246e582df 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs @@ -69,7 +69,7 @@ pub fn simulate< // ically later time let (time, steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { pause_before.map_or(ControlFlow::Continue(()), |pause_before| { if next_event_time >= pause_before { ControlFlow::Break(()) diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs index b12afb7d7..349c74164 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs @@ -109,7 +109,7 @@ pub fn simulate< // e.g. (1->2)|(2->3)|(3->1) => (1->2)|(3->1) let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs index 767e5cbb9..def28db5e 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs @@ -78,7 +78,7 @@ pub fn simulate< // (we already know at least one partition has some next event time) let next_local_emigration_time = { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |simulation, _, _| { + |simulation, _, _, _| { if simulation.emigration_exit().is_empty() { ControlFlow::Continue(()) } else { @@ -115,7 +115,7 @@ pub fn simulate< // that event Ok(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { @@ -139,7 +139,7 @@ pub fn simulate< // All other partitions get to simulate until just before this next migration event Err(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_global_time { ControlFlow::Break(()) } else { diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index b2c7e4294..04fbadf46 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -91,13 +91,16 @@ pub fn simulate< let mut final_next_event_time = None; let (time, steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, reporter| { final_next_event_time = Some(next_event_time); - if steps >= max_steps || next_event_time >= max_next_event_time { - ControlFlow::Break(()) - } else { + if steps < max_steps + && next_event_time < max_next_event_time + && reporter.can_buffer_next_event() + { ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) } }, event_buffer_reporter, From 387e4b604fcd1b1f3f7f8143c89a243a30a266ce Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 20 Jan 2024 17:07:44 +0000 Subject: [PATCH 20/28] Try trait-based kernel signature check --- Cargo.lock | 6 +++--- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b712fc8e4..c6a0d125c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" +source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" +source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=d1f141e#d1f141e9044ffa24bd286c3b8dd1213ca74436cf" +source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index b67471036..8296b8bf6 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 8313bc544..e2504f9ba 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index faf24a8e9..e4e5ac8c4 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 91fa77f76..b4a430df4 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index f1849c38f..2088af073 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index a29baa472..695dec658 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "d1f141e", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "kernel"] } From f5f490c63e37de68bcddc5fd509fb58aef5273bd Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Sat, 20 Jan 2024 19:14:29 +0200 Subject: [PATCH 21/28] Update rust-toolchain --- rust-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index 3ab928278..19bb9762a 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2023-11-10" +channel = "nightly" components = [ "cargo", "rustfmt", "clippy", "rust-src" ] targets = [ "x86_64-unknown-linux-gnu", "nvptx64-nvidia-cuda" ] From 9db2ef56d9561e803497a1f1d41eecfd02c5114c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 20 Jan 2024 17:47:08 +0000 Subject: [PATCH 22/28] Fix clippy lints --- Cargo.lock | 8 ++++---- necsim/core/bond/src/closed_open_unit_f64.rs | 1 + necsim/core/bond/src/closed_unit_f64.rs | 1 + necsim/core/bond/src/non_negative_f64.rs | 1 + necsim/core/bond/src/non_positive_f64.rs | 1 + necsim/core/bond/src/open_closed_unit_f64.rs | 1 + necsim/core/bond/src/positive_f64.rs | 1 + necsim/core/maths/src/lib.rs | 1 + necsim/impls/cuda/src/lib.rs | 3 ++- .../std/src/event_log/replay/sorted_segments.rs | 1 + necsim/plugins/core/src/import/combinator.rs | 4 ++-- .../algorithms/cuda/cpu-kernel/src/lib.rs | 1 - .../algorithms/cuda/gpu-kernel/src/lib.rs | 13 +++++++------ 13 files changed, 23 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6a0d125c..2b215660a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "anstream" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fde6067df7359f2d6335ec1a50c1f8f825801687d10da0cc4c6b08e3f6afd15" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -1892,9 +1892,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "stable_deref_trait" diff --git a/necsim/core/bond/src/closed_open_unit_f64.rs b/necsim/core/bond/src/closed_open_unit_f64.rs index 0d2155c13..e6424106a 100644 --- a/necsim/core/bond/src/closed_open_unit_f64.rs +++ b/necsim/core/bond/src/closed_open_unit_f64.rs @@ -88,6 +88,7 @@ impl ClosedOpenUnitF64 { } impl PartialEq for ClosedOpenUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/closed_unit_f64.rs b/necsim/core/bond/src/closed_unit_f64.rs index 664c9f20e..d5c0bdc02 100644 --- a/necsim/core/bond/src/closed_unit_f64.rs +++ b/necsim/core/bond/src/closed_unit_f64.rs @@ -122,6 +122,7 @@ impl From for ClosedUnitF64 { } impl PartialEq for ClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/non_negative_f64.rs b/necsim/core/bond/src/non_negative_f64.rs index cf60da503..89e3ea295 100644 --- a/necsim/core/bond/src/non_negative_f64.rs +++ b/necsim/core/bond/src/non_negative_f64.rs @@ -161,6 +161,7 @@ impl From for NonNegativeF64 { } impl PartialEq for NonNegativeF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/non_positive_f64.rs b/necsim/core/bond/src/non_positive_f64.rs index 62807c4bf..2e7cce0e8 100644 --- a/necsim/core/bond/src/non_positive_f64.rs +++ b/necsim/core/bond/src/non_positive_f64.rs @@ -94,6 +94,7 @@ impl NonPositiveF64 { } impl PartialEq for NonPositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/open_closed_unit_f64.rs b/necsim/core/bond/src/open_closed_unit_f64.rs index a82fdfc37..b4b3441dc 100644 --- a/necsim/core/bond/src/open_closed_unit_f64.rs +++ b/necsim/core/bond/src/open_closed_unit_f64.rs @@ -94,6 +94,7 @@ impl OpenClosedUnitF64 { } impl PartialEq for OpenClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/positive_f64.rs b/necsim/core/bond/src/positive_f64.rs index ff710fceb..65561dfb8 100644 --- a/necsim/core/bond/src/positive_f64.rs +++ b/necsim/core/bond/src/positive_f64.rs @@ -122,6 +122,7 @@ impl From for PositiveF64 { } impl PartialEq for PositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/maths/src/lib.rs b/necsim/core/maths/src/lib.rs index 7102424da..3a73a5f33 100644 --- a/necsim/core/maths/src/lib.rs +++ b/necsim/core/maths/src/lib.rs @@ -1,5 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] +#![allow(internal_features)] #![feature(core_intrinsics)] pub trait MathsCore: 'static + Clone + core::fmt::Debug { diff --git a/necsim/impls/cuda/src/lib.rs b/necsim/impls/cuda/src/lib.rs index e7d657c28..ff7361e05 100644 --- a/necsim/impls/cuda/src/lib.rs +++ b/necsim/impls/cuda/src/lib.rs @@ -1,6 +1,5 @@ #![deny(clippy::pedantic)] #![no_std] -#![feature(core_intrinsics)] #![feature(const_type_name)] #![feature(offset_of)] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] @@ -8,6 +7,8 @@ #![cfg_attr(target_os = "cuda", feature(const_float_bits_conv))] #![allow(incomplete_features)] #![feature(specialization)] +#![allow(internal_features)] +#![feature(core_intrinsics)] extern crate alloc; diff --git a/necsim/impls/std/src/event_log/replay/sorted_segments.rs b/necsim/impls/std/src/event_log/replay/sorted_segments.rs index 2c209cd95..57c18b6e9 100644 --- a/necsim/impls/std/src/event_log/replay/sorted_segments.rs +++ b/necsim/impls/std/src/event_log/replay/sorted_segments.rs @@ -101,6 +101,7 @@ impl PartialOrd for SortedSortedSegments { } impl PartialEq for SortedSortedSegments { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.next.eq(&other.next) } diff --git a/necsim/plugins/core/src/import/combinator.rs b/necsim/plugins/core/src/import/combinator.rs index d948c5e3e..a99fb5784 100644 --- a/necsim/plugins/core/src/import/combinator.rs +++ b/necsim/plugins/core/src/import/combinator.rs @@ -3,7 +3,6 @@ use std::{ iter::{FromIterator, IntoIterator}, marker::PhantomData, path::Path, - rc::Rc, }; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -68,7 +67,8 @@ impl>(); let result = inner(self); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 66c190158..8f206ab43 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,5 +1,4 @@ #![deny(clippy::pedantic)] -#![feature(c_str_literals)] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 04fbadf46..94938d34d 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -2,7 +2,6 @@ #![no_std] #![feature(type_alias_impl_trait)] #![feature(decl_macro)] -#![feature(c_str_literals)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] @@ -40,7 +39,7 @@ use rust_cuda::{ #[rust_cuda::kernel::kernel(pub use link! for impl)] #[kernel( allow(ptx::double_precision_use), - forbid(ptx::local_memory_usage, ptx::register_spills) + forbid(ptx::local_memory_use, ptx::register_spills) )] #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] @@ -132,24 +131,26 @@ mod cuda_prelude { #[cfg(not(debug_assertions))] #[panic_handler] fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { - rust_cuda::device::utils::exit() + rust_cuda::device::utils::abort() } #[cfg(debug_assertions)] #[panic_handler] fn panic(info: &::core::panic::PanicInfo) -> ! { - rust_cuda::device::utils::pretty_panic_handler(info, true, true) + rust_cuda::device::utils::pretty_print_panic_info(info, true, true); + rust_cuda::device::utils::abort() } #[cfg(not(debug_assertions))] #[alloc_error_handler] fn alloc_error_handler(_: core::alloc::Layout) -> ! { - rust_cuda::device::utils::exit() + rust_cuda::device::utils::abort() } #[cfg(debug_assertions)] #[alloc_error_handler] fn alloc_error_handler(layout: core::alloc::Layout) -> ! { - rust_cuda::device::utils::pretty_alloc_error_handler(layout) + rust_cuda::device::utils::pretty_print_alloc_error(layout); + rust_cuda::device::utils::abort() } } From 064d7984280c5d9c32b61da4a6ea9973ef553716 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 21 Jan 2024 07:43:35 +0000 Subject: [PATCH 23/28] Try with const match instead --- Cargo.lock | 6 +++--- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2b215660a..62568f74b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" +source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" +source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=521419c#521419c50b734a55d8ccccdffc45674d681d20d1" +source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index 8296b8bf6..f0277b705 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index e2504f9ba..63adc3a4e 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index e4e5ac8c4..7ae0b9a5c 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index b4a430df4..915c6a963 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 2088af073..9d3f393d9 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 695dec658..b5c08a30b 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "521419c", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "kernel"] } From 9705e28a7b7cbbb33c4eb6db36f9e44b55259b0f Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 21 Jan 2024 08:01:15 +0000 Subject: [PATCH 24/28] Try with memcmp intrinsic --- Cargo.lock | 6 +++--- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62568f74b..43863d74d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" +source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" +source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=1c8115c#1c8115c219c4d48dfcaafd9dd6889001a0742277" +source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index f0277b705..bcaea5c0c 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 63adc3a4e..f0ebe2aef 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 7ae0b9a5c..3bba2c480 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 915c6a963..f5bf24843 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 9d3f393d9..24d3c037f 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index b5c08a30b..a92586ae5 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "1c8115c", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "kernel"] } From 8306317e712cb8e7d5c661137f9117382d15690e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 1 Feb 2024 08:58:57 +0000 Subject: [PATCH 25/28] Try out experimental const-type-layout with compression --- Cargo.lock | 62 +++++++++---------- necsim/core/Cargo.toml | 6 +- necsim/core/bond/Cargo.toml | 2 +- necsim/core/bond/src/lib.rs | 1 - necsim/core/src/lib.rs | 1 - necsim/impls/cuda/Cargo.toml | 6 +- necsim/impls/cuda/src/lib.rs | 1 - necsim/impls/no-std/Cargo.toml | 6 +- necsim/impls/no-std/src/lib.rs | 1 - rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- rustcoalescence/src/args/config/rng/mod.rs | 6 +- 13 files changed, 45 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43863d74d..3161e62b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,9 +52,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" [[package]] name = "anstyle-parse" @@ -241,9 +241,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" dependencies = [ "bytemuck_derive", ] @@ -325,9 +325,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" dependencies = [ "num-traits", ] @@ -402,8 +402,7 @@ dependencies = [ [[package]] name = "const-type-layout" version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a8a1418a7c3cfdf6db57795ced0855a24249ddd38f1a3373d648cc3ef390d9" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#f6790bab9cb5b10d9283d1afb576cc62fca2091b" dependencies = [ "const-type-layout-derive", ] @@ -411,8 +410,7 @@ dependencies = [ [[package]] name = "const-type-layout-derive" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf506f23bd8d2a7b9758a9abe0f4cddb87d6fd9206c836e65a48ecbdec74d4e" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#f6790bab9cb5b10d9283d1afb576cc62fca2091b" dependencies = [ "proc-macro-error", "proc-macro2", @@ -838,9 +836,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.152" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libloading" @@ -1338,9 +1336,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -1411,13 +1409,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", + "regex-automata 0.4.5", "regex-syntax 0.8.2", ] @@ -1432,9 +1430,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -1482,7 +1480,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" +source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1497,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" +source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1508,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=b040cac#b040cac08c51ffe3903d0e900eb1ed7ba59a38d0" +source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" dependencies = [ "cargo_metadata", "colored", @@ -1795,18 +1793,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.195" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.195" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", @@ -1826,9 +1824,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", @@ -1871,9 +1869,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shlex" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simdutf8" @@ -2210,9 +2208,9 @@ checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" [[package]] name = "weezl" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" [[package]] name = "which" diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index bcaea5c0c..f51adbd8b 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -15,12 +15,12 @@ cuda = ["rust-cuda"] necsim-core-maths = { path = "maths" } necsim-core-bond = { path = "bond" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "host"], optional = true } diff --git a/necsim/core/bond/Cargo.toml b/necsim/core/bond/Cargo.toml index c9c8651f6..d2729e917 100644 --- a/necsim/core/bond/Cargo.toml +++ b/necsim/core/bond/Cargo.toml @@ -13,5 +13,5 @@ default = [] [dependencies] necsim-core-maths = { path = "../maths" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/necsim/core/bond/src/lib.rs b/necsim/core/bond/src/lib.rs index 67c6b9e81..ff3007150 100644 --- a/necsim/core/bond/src/lib.rs +++ b/necsim/core/bond/src/lib.rs @@ -4,7 +4,6 @@ #![feature(const_float_bits_conv)] #![feature(const_float_classify)] #![feature(const_type_name)] -#![feature(offset_of)] #[macro_use] extern crate const_type_layout; diff --git a/necsim/core/src/lib.rs b/necsim/core/src/lib.rs index adef409b9..a8da66266 100644 --- a/necsim/core/src/lib.rs +++ b/necsim/core/src/lib.rs @@ -1,7 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] #![feature(const_type_name)] -#![feature(offset_of)] #![feature(min_specialization)] #[doc(hidden)] diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index f0ebe2aef..5956c6a30 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -10,12 +10,12 @@ edition = "2021" [dependencies] necsim-core = { path = "../../core", features = ["cuda"] } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/lib.rs b/necsim/impls/cuda/src/lib.rs index ff7361e05..abb2b2fcd 100644 --- a/necsim/impls/cuda/src/lib.rs +++ b/necsim/impls/cuda/src/lib.rs @@ -1,7 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] #![feature(const_type_name)] -#![feature(offset_of)] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] #![cfg_attr(target_os = "cuda", feature(asm_const))] #![cfg_attr(target_os = "cuda", feature(const_float_bits_conv))] diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 3bba2c480..ce88df33a 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -17,7 +17,7 @@ necsim-core-maths = { path = "../../core/maths" } necsim-core-bond = { path = "../../core/bond" } necsim-partitioning-core = { path = "../../partitioning/core" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" libm = "0.2" hashbrown = "0.13" @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/lib.rs b/necsim/impls/no-std/src/lib.rs index f26467e88..aa63583a1 100644 --- a/necsim/impls/no-std/src/lib.rs +++ b/necsim/impls/no-std/src/lib.rs @@ -3,7 +3,6 @@ #![feature(iter_advance_by)] #![feature(extract_if)] #![feature(const_type_name)] -#![feature(offset_of)] #![feature(negative_impls)] #![feature(impl_trait_in_assoc_type)] #![allow(incomplete_features)] diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index f5bf24843..26e7c9e9a 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 24d3c037f..9de40d95c 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index a92586ae5..4ae3ac202 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "b040cac", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "kernel"] } diff --git a/rustcoalescence/src/args/config/rng/mod.rs b/rustcoalescence/src/args/config/rng/mod.rs index 6899a848a..5e536a5c3 100644 --- a/rustcoalescence/src/args/config/rng/mod.rs +++ b/rustcoalescence/src/args/config/rng/mod.rs @@ -188,11 +188,7 @@ impl<'a> ProtectedState<'a> { } fn from_bytes(bytes: &'a [u8]) -> Option { - if bytes.len() < 4 { - return None; - } - - let (state, checksum) = bytes.rsplit_array_ref(); + let (state, checksum) = bytes.split_last_chunk()?; let checksum = u32::from_le_bytes(*checksum); if adler::adler32_slice(state) != checksum { From c650a4e8cb83e98a01ee80cc9c1eb8f5c826a228 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 2 Feb 2024 08:30:04 +0000 Subject: [PATCH 26/28] Try interning all const layout strings --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3161e62b5..d5444907e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,7 +402,7 @@ dependencies = [ [[package]] name = "const-type-layout" version = "0.2.1" -source = "git+https://github.com/juntyr/const-type-layout?branch=compress#f6790bab9cb5b10d9283d1afb576cc62fca2091b" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#5635167dd776304fc75fa7936364e4621d9c2ade" dependencies = [ "const-type-layout-derive", ] @@ -410,7 +410,7 @@ dependencies = [ [[package]] name = "const-type-layout-derive" version = "0.2.0" -source = "git+https://github.com/juntyr/const-type-layout?branch=compress#f6790bab9cb5b10d9283d1afb576cc62fca2091b" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#5635167dd776304fc75fa7936364e4621d9c2ade" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1720,9 +1720,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.30" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ "bitflags 2.4.2", "errno", From ccb2b8adbd951f5d49609fff47159e189fcbc5b5 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 5 Feb 2024 07:09:08 +0000 Subject: [PATCH 27/28] Try check --- Cargo.lock | 14 +++++++------- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5444907e..c62829a52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,7 +402,7 @@ dependencies = [ [[package]] name = "const-type-layout" version = "0.2.1" -source = "git+https://github.com/juntyr/const-type-layout?branch=compress#5635167dd776304fc75fa7936364e4621d9c2ade" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#51836b1b05b7ac31e74f7c4b981ea7a0fb795be2" dependencies = [ "const-type-layout-derive", ] @@ -410,7 +410,7 @@ dependencies = [ [[package]] name = "const-type-layout-derive" version = "0.2.0" -source = "git+https://github.com/juntyr/const-type-layout?branch=compress#5635167dd776304fc75fa7936364e4621d9c2ade" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#51836b1b05b7ac31e74f7c4b981ea7a0fb795be2" dependencies = [ "proc-macro-error", "proc-macro2", @@ -935,9 +935,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1480,7 +1480,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" dependencies = [ "const-type-layout", "final", @@ -1497,7 +1497,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1508,7 +1508,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=44a974bd#44a974bd2b0191193b635c4254995ed7b12ea9f5" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index f51adbd8b..522b02b57 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 5956c6a30..a10857534 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index ce88df33a..d35c81045 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 26e7c9e9a..77e7beac9 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 9de40d95c..f603c622f 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 4ae3ac202..19a3aa407 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "44a974bd", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "kernel"] } From d494d5cc7427b9c2945d6235554a5d4471d5f45b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 5 Feb 2024 09:13:51 +0000 Subject: [PATCH 28/28] Try check again --- necsim/core/Cargo.toml | 4 ++-- necsim/impls/cuda/Cargo.toml | 4 ++-- necsim/impls/no-std/Cargo.toml | 4 ++-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index 522b02b57..e2475c4d1 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index a10857534..22f5ad1ba 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index d35c81045..f2265ab8e 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 77e7beac9..2ea0383d4 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index f603c622f..c9eefc131 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 19a3aa407..5fec391e1 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "3ec81181", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "kernel"] }