From 0184db3d36d26bf1219eaf2a756486862e8f3081 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 20 Mar 2026 13:59:47 +0100 Subject: [PATCH 1/6] Invert meaning of operator bool for CUDA stream and event --- include/ghex/device/cuda/event.hpp | 10 +--------- include/ghex/device/cuda/event_pool.hpp | 2 +- include/ghex/device/cuda/stream.hpp | 10 +--------- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/include/ghex/device/cuda/event.hpp b/include/ghex/device/cuda/event.hpp index 4e0305df..f39c91ca 100644 --- a/include/ghex/device/cuda/event.hpp +++ b/include/ghex/device/cuda/event.hpp @@ -40,15 +40,7 @@ struct cuda_event if (!m_moved) { GHEX_CHECK_CUDA_RESULT_NO_THROW(cudaEventDestroy(m_event)) } } - /** - * @brief Returns `true` if `*this` has been moved, i.e. can no longer be used. - * - * @todo The semantic of this function is a bit confusing as a valid object returns - * `false`. It should be changed such that a valid object returns `true` and an - * invalid one returns `false`. This is the behaviour for `GHEX_C_STRUCT` and - * `GHEX_C_MANAGED_STRUCT` but not for `stream` and `cuda_event`. - */ - operator bool() const noexcept { return m_moved; } + operator bool() const noexcept { return !m_moved; } cudaEvent_t& get() noexcept { diff --git a/include/ghex/device/cuda/event_pool.hpp b/include/ghex/device/cuda/event_pool.hpp index f65a2b67..ce1b77bf 100644 --- a/include/ghex/device/cuda/event_pool.hpp +++ b/include/ghex/device/cuda/event_pool.hpp @@ -70,7 +70,7 @@ struct event_pool while (!(m_next_event < m_events.size())) { m_events.emplace_back(cuda_event()); } const std::size_t event_to_use = m_next_event; - assert(!bool(m_events[event_to_use])); + assert(bool(m_events[event_to_use])); m_next_event += 1; return m_events[event_to_use]; } diff --git a/include/ghex/device/cuda/stream.hpp b/include/ghex/device/cuda/stream.hpp index 0c93ed4b..7ade4771 100644 --- a/include/ghex/device/cuda/stream.hpp +++ b/include/ghex/device/cuda/stream.hpp @@ -39,15 +39,7 @@ struct stream if (!m_moved) { GHEX_CHECK_CUDA_RESULT_NO_THROW(cudaStreamDestroy(m_stream)) } } - /** - * @brief Returns `true` if `*this` has been moved, i.e. can no longer be used. - * - * @todo The semantic of this function is a bit confusing as a valid object returns - * `false`. It should be changed such that a valid object returns `true` and an - * invalid one returns `false`. This is the behaviour for `GHEX_C_STRUCT` and - * `GHEX_C_MANAGED_STRUCT` but not for `stream` and `cuda_event`. - */ - operator bool() const noexcept { return m_moved; } + operator bool() const noexcept { return !m_moved; } operator cudaStream_t() const noexcept { From 43e66be83598bdcbd14d637c93d21d434752b550 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 23 Mar 2026 11:35:55 +0100 Subject: [PATCH 2/6] Unify CUDA event implementations Use explicit cuda_event implementation everywhere. --- include/ghex/device/cuda/event.hpp | 7 +++++-- include/ghex/device/cuda/future.hpp | 16 +++------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/include/ghex/device/cuda/event.hpp b/include/ghex/device/cuda/event.hpp index f39c91ca..9fb30a87 100644 --- a/include/ghex/device/cuda/event.hpp +++ b/include/ghex/device/cuda/event.hpp @@ -27,8 +27,8 @@ struct cuda_event cudaEvent_t m_event; ghex::util::moved_bit m_moved; - cuda_event() { - GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(&m_event, cudaEventDisableTiming)) + cuda_event(unsigned int flags = cudaEventDisableTiming) { + GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(&m_event, flags)) }; cuda_event(const cuda_event&) = delete; cuda_event& operator=(const cuda_event&) = delete; @@ -52,6 +52,9 @@ struct cuda_event assert(!m_moved); return m_event; } + + operator cudaEvent_t&() noexcept { return get(); } + operator const cudaEvent_t&() const noexcept { return get(); } }; } // namespace device } // namespace ghex diff --git a/include/ghex/device/cuda/future.hpp b/include/ghex/device/cuda/future.hpp index bdb0965f..085a710c 100644 --- a/include/ghex/device/cuda/future.hpp +++ b/include/ghex/device/cuda/future.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include #ifdef GHEX_CUDACC #include @@ -28,12 +28,7 @@ namespace device template struct future { - GHEX_C_MANAGED_STRUCT( - event_type, cudaEvent_t, [](auto&&... args) - { GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(std::forward(args)...)) }, - [](auto& e) { GHEX_CHECK_CUDA_RESULT_NO_THROW(cudaEventDestroy(e)) }) - - event_type m_event; + cuda_event m_event; T m_data; future(T&& data, stream& stream) @@ -65,12 +60,7 @@ struct future template<> struct future { - GHEX_C_MANAGED_STRUCT( - event_type, cudaEvent_t, [](auto&&... args) - { GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(std::forward(args)...)) }, - [](auto& e) { GHEX_CHECK_CUDA_RESULT_NO_THROW(cudaEventDestroy(e)) }) - - event_type m_event; + cuda_event m_event; future(stream& stream) : m_event{cudaEventDisableTiming} From 4142369a92597b2caa581d2b7b90c222d28e3245 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 23 Mar 2026 11:37:51 +0100 Subject: [PATCH 3/6] Default-construct cuda_event since it already sets cudaEventDisableTiming --- include/ghex/device/cuda/future.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/ghex/device/cuda/future.hpp b/include/ghex/device/cuda/future.hpp index 085a710c..35b2b7f1 100644 --- a/include/ghex/device/cuda/future.hpp +++ b/include/ghex/device/cuda/future.hpp @@ -32,7 +32,7 @@ struct future T m_data; future(T&& data, stream& stream) - : m_event{cudaEventDisableTiming} //: m_event{cudaEventDisableTiming | cudaEventBlockingSync} + : m_event{} , m_data{std::move(data)} { GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event, stream)); @@ -63,8 +63,7 @@ struct future cuda_event m_event; future(stream& stream) - : m_event{cudaEventDisableTiming} - //: m_event{cudaEventDisableTiming | cudaEventBlockingSync} + : m_event{} { GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event, stream)); } From d92c9e4722657e35fc578e5864d7a13d0ec4316b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 16:04:33 +0100 Subject: [PATCH 4/6] Split up cuda event constructors and remove implicit conversion to cudaEvent_t --- include/ghex/device/cuda/event.hpp | 6 ++---- include/ghex/device/cuda/future.hpp | 12 ++++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/ghex/device/cuda/event.hpp b/include/ghex/device/cuda/event.hpp index 9fb30a87..c7ad5712 100644 --- a/include/ghex/device/cuda/event.hpp +++ b/include/ghex/device/cuda/event.hpp @@ -27,7 +27,8 @@ struct cuda_event cudaEvent_t m_event; ghex::util::moved_bit m_moved; - cuda_event(unsigned int flags = cudaEventDisableTiming) { + cuda_event() : cuda_event(cudaEventDisableTiming) {} + explicit cuda_event(unsigned int flags) { GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(&m_event, flags)) }; cuda_event(const cuda_event&) = delete; @@ -52,9 +53,6 @@ struct cuda_event assert(!m_moved); return m_event; } - - operator cudaEvent_t&() noexcept { return get(); } - operator const cudaEvent_t&() const noexcept { return get(); } }; } // namespace device } // namespace ghex diff --git a/include/ghex/device/cuda/future.hpp b/include/ghex/device/cuda/future.hpp index 35b2b7f1..6c5e83e9 100644 --- a/include/ghex/device/cuda/future.hpp +++ b/include/ghex/device/cuda/future.hpp @@ -35,7 +35,7 @@ struct future : m_event{} , m_data{std::move(data)} { - GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event, stream)); + GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event.get(), stream)); } future(const future&) = delete; @@ -43,11 +43,11 @@ struct future future(future&& other) = default; future& operator=(future&&) = default; - bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event)) : true); } + bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); } void wait() { - if (m_event) GHEX_CHECK_CUDA_RESULT(cudaEventSynchronize(m_event)); + if (m_event) GHEX_CHECK_CUDA_RESULT(cudaEventSynchronize(m_event.get())); } [[nodiscard]] T get() @@ -65,7 +65,7 @@ struct future future(stream& stream) : m_event{} { - GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event, stream)); + GHEX_CHECK_CUDA_RESULT(cudaEventRecord(m_event.get(), stream)); } future(const future&) = delete; @@ -73,11 +73,11 @@ struct future future(future&& other) = default; future& operator=(future&&) = default; - bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event)) : true); } + bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); } void wait() { - if (m_event) GHEX_CHECK_CUDA_RESULT(cudaEventSynchronize(m_event)); + if (m_event) GHEX_CHECK_CUDA_RESULT(cudaEventSynchronize(m_event.get())); } void get() { wait(); } From 3b27d76427f71d8be9c680f77e7c7b73d917f817 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 16:07:10 +0100 Subject: [PATCH 5/6] Format files --- include/ghex/device/cuda/event.hpp | 5 ++++- include/ghex/device/cuda/future.hpp | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/ghex/device/cuda/event.hpp b/include/ghex/device/cuda/event.hpp index c7ad5712..b35c8ee8 100644 --- a/include/ghex/device/cuda/event.hpp +++ b/include/ghex/device/cuda/event.hpp @@ -27,7 +27,10 @@ struct cuda_event cudaEvent_t m_event; ghex::util::moved_bit m_moved; - cuda_event() : cuda_event(cudaEventDisableTiming) {} + cuda_event() + : cuda_event(cudaEventDisableTiming) + { + } explicit cuda_event(unsigned int flags) { GHEX_CHECK_CUDA_RESULT(cudaEventCreateWithFlags(&m_event, flags)) }; diff --git a/include/ghex/device/cuda/future.hpp b/include/ghex/device/cuda/future.hpp index 6c5e83e9..44e36f58 100644 --- a/include/ghex/device/cuda/future.hpp +++ b/include/ghex/device/cuda/future.hpp @@ -43,7 +43,10 @@ struct future future(future&& other) = default; future& operator=(future&&) = default; - bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); } + bool test() noexcept + { + return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); + } void wait() { @@ -73,7 +76,10 @@ struct future future(future&& other) = default; future& operator=(future&&) = default; - bool test() noexcept { return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); } + bool test() noexcept + { + return (m_event ? (cudaSuccess == cudaEventQuery(m_event.get())) : true); + } void wait() { From fb8162f78a4b27d8aeaf0f9d630845102337f75f Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 16:09:07 +0100 Subject: [PATCH 6/6] Fix includes --- include/ghex/device/cuda/future.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ghex/device/cuda/future.hpp b/include/ghex/device/cuda/future.hpp index 44e36f58..c06b290b 100644 --- a/include/ghex/device/cuda/future.hpp +++ b/include/ghex/device/cuda/future.hpp @@ -10,9 +10,9 @@ #pragma once #include +#ifdef GHEX_CUDACC #include #include -#ifdef GHEX_CUDACC #include #endif #include