diff --git a/hist/histv7/inc/ROOT/RBinWithError.hxx b/hist/histv7/inc/ROOT/RBinWithError.hxx index ce665497b0d69..d25836447e69a 100644 --- a/hist/histv7/inc/ROOT/RBinWithError.hxx +++ b/hist/histv7/inc/ROOT/RBinWithError.hxx @@ -59,6 +59,15 @@ struct RBinWithError final { Internal::AtomicAdd(&fSum, w); Internal::AtomicAdd(&fSum2, w * w); } + + /// Add another bin content using atomic instructions. + /// + /// \param[in] rhs another bin content that must not be modified during the operation + void AtomicAdd(const RBinWithError &rhs) + { + Internal::AtomicAdd(&fSum, rhs.fSum); + Internal::AtomicAdd(&fSum2, rhs.fSum2); + } }; } // namespace Experimental diff --git a/hist/histv7/inc/ROOT/RHist.hxx b/hist/histv7/inc/ROOT/RHist.hxx index d7740d594f57b..f46b753b6a067 100644 --- a/hist/histv7/inc/ROOT/RHist.hxx +++ b/hist/histv7/inc/ROOT/RHist.hxx @@ -174,6 +174,17 @@ public: fStats.Add(other.fStats); } + /// Add all bin contents and statistics of another histogram using atomic instructions. + /// + /// Throws an exception if the axes configurations are not identical. + /// + /// \param[in] other another histogram that must not be modified during the operation + void AddAtomic(const RHist &other) + { + fEngine.AddAtomic(other.fEngine); + fStats.AddAtomic(other.fStats); + } + /// Clear all bin contents and statistics. void Clear() { diff --git a/hist/histv7/inc/ROOT/RHistEngine.hxx b/hist/histv7/inc/ROOT/RHistEngine.hxx index e56ed6877862e..584f1f92cea3f 100644 --- a/hist/histv7/inc/ROOT/RHistEngine.hxx +++ b/hist/histv7/inc/ROOT/RHistEngine.hxx @@ -196,6 +196,21 @@ public: } } + /// Add all bin contents of another histogram using atomic instructions. + /// + /// Throws an exception if the axes configurations are not identical. + /// + /// \param[in] other another histogram that must not be modified during the operation + void AddAtomic(const RHistEngine &other) + { + if (fAxes != other.fAxes) { + throw std::invalid_argument("axes configurations not identical in AddAtomic"); + } + for (std::size_t i = 0; i < fBinContents.size(); i++) { + Internal::AtomicAdd(&fBinContents[i], other.fBinContents[i]); + } + } + /// Clear all bin contents. void Clear() { diff --git a/hist/histv7/inc/ROOT/RHistStats.hxx b/hist/histv7/inc/ROOT/RHistStats.hxx index 4bde3b85fcaec..eb16d4300fe07 100644 --- a/hist/histv7/inc/ROOT/RHistStats.hxx +++ b/hist/histv7/inc/ROOT/RHistStats.hxx @@ -68,6 +68,17 @@ public: fSumWX4 += other.fSumWX4; } + /// Add another statistics object using atomic instructions. + /// + /// \param[in] other another statistics object that must not be modified during the operation + void AddAtomic(const RDimensionStats &other) + { + Internal::AtomicAdd(&fSumWX, other.fSumWX); + Internal::AtomicAdd(&fSumWX2, other.fSumWX2); + Internal::AtomicAdd(&fSumWX3, other.fSumWX3); + Internal::AtomicAdd(&fSumWX4, other.fSumWX4); + } + void Clear() { fSumWX = 0.0; @@ -125,6 +136,24 @@ public: } } + /// Add all entries from another statistics object using atomic instructions. + /// + /// Throws an exception if the number of dimensions are not identical. + /// + /// \param[in] other another statistics object that must not be modified during the operation + void AddAtomic(const RHistStats &other) + { + if (fDimensionStats.size() != other.fDimensionStats.size()) { + throw std::invalid_argument("number of dimensions not identical in Add"); + } + Internal::AtomicAdd(&fNEntries, other.fNEntries); + Internal::AtomicAdd(&fSumW, other.fSumW); + Internal::AtomicAdd(&fSumW2, other.fSumW2); + for (std::size_t i = 0; i < fDimensionStats.size(); i++) { + fDimensionStats[i].AddAtomic(other.fDimensionStats[i]); + } + } + /// Clear this statistics object. void Clear() { diff --git a/hist/histv7/inc/ROOT/RHistUtils.hxx b/hist/histv7/inc/ROOT/RHistUtils.hxx index 8a8fccdc2cade..c79bbe1e2a771 100644 --- a/hist/histv7/inc/ROOT/RHistUtils.hxx +++ b/hist/histv7/inc/ROOT/RHistUtils.hxx @@ -197,15 +197,15 @@ std::enable_if_t> AtomicInc(T *ptr) } template -std::enable_if_t> AtomicAdd(T *ptr, const U &add) +auto AtomicAdd(T *ptr, const U &add) -> decltype(ptr->AtomicAdd(add)) { - ptr->AtomicAdd(add); + return ptr->AtomicAdd(add); } template -std::enable_if_t> AtomicInc(T *ptr) +auto AtomicInc(T *ptr) -> decltype(ptr->AtomicInc()) { - ptr->AtomicInc(); + return ptr->AtomicInc(); } } // namespace Internal diff --git a/hist/histv7/test/hist_engine_atomic.cxx b/hist/histv7/test/hist_engine_atomic.cxx index af81163c328b9..3d141ae7c1107 100644 --- a/hist/histv7/test/hist_engine_atomic.cxx +++ b/hist/histv7/test/hist_engine_atomic.cxx @@ -1,5 +1,63 @@ #include "hist_test.hxx" +#include +#include + +TEST(RHistEngine, AddAtomic) +{ + static constexpr std::size_t Bins = 20; + const RRegularAxis axis(Bins, {0, Bins}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + + engineA.Fill(-100); + for (std::size_t i = 0; i < Bins; i++) { + engineA.Fill(i); + engineA.Fill(i); + engineB.Fill(i); + } + engineB.Fill(100); + + engineA.AddAtomic(engineB); + + EXPECT_EQ(engineA.GetBinContent(RBinIndex::Underflow()), 1); + for (auto index : axis.GetNormalRange()) { + EXPECT_EQ(engineA.GetBinContent(index), 3); + } + EXPECT_EQ(engineA.GetBinContent(RBinIndex::Overflow()), 1); +} + +TEST(RHistEngine, StressAddAtomic) +{ + static constexpr std::size_t NThreads = 4; + static constexpr std::size_t NAddsPerThread = 10000; + static constexpr std::size_t NAdds = NThreads * NAddsPerThread; + + // Fill a single bin, to maximize contention. + const RRegularAxis axis(1, {0, 1}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + engineB.Fill(0.5); + + StressInParallel(NThreads, [&] { + for (std::size_t i = 0; i < NAddsPerThread; i++) { + engineA.AddAtomic(engineB); + } + }); + + EXPECT_EQ(engineA.GetBinContent(0), NAdds); +} + +TEST(RHistEngine, AddAtomicDifferent) +{ + // The equality operators of RAxes and the axis objects are already unit-tested separately, so here we only check one + // case with different the number of bins. + RHistEngine engineA(10, {0, 1}); + RHistEngine engineB(20, {0, 1}); + + EXPECT_THROW(engineA.AddAtomic(engineB), std::invalid_argument); +} + TEST(RHistEngine, FillAtomic) { static constexpr std::size_t Bins = 20; @@ -179,6 +237,81 @@ TEST(RHistEngine, FillAtomicTupleWeightInvalidNumberOfArguments) EXPECT_THROW(engine2.FillAtomic(std::make_tuple(1, 2, 3), RWeight(1)), std::invalid_argument); } +TEST(RHistEngine, StressFillAddAtomicWeight) +{ + static constexpr std::size_t NThreads = 4; + static constexpr std::size_t NOpsPerThread = 10000; + static constexpr std::size_t NOps = NThreads * NOpsPerThread; + static constexpr double Weight = 0.5; + + // Fill a single bin, to maximize contention. + const RRegularAxis axis(1, {0, 1}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + engineB.Fill(0.5, RWeight(Weight)); + + std::atomic op = 0; + StressInParallel(NThreads, [&] { + int chosenOp = op.fetch_add(1) % 2; + if (chosenOp == 0) { + for (std::size_t i = 0; i < NOpsPerThread; i++) { + engineA.FillAtomic(0.5, RWeight(Weight)); + } + } else { + for (std::size_t i = 0; i < NOpsPerThread; i++) { + engineA.AddAtomic(engineB); + } + } + }); + + EXPECT_EQ(engineA.GetBinContent(0), NOps * Weight); +} + +TEST(RHistEngine_RBinWithError, AddAtomic) +{ + static constexpr std::size_t Bins = 20; + const RRegularAxis axis(Bins, {0, Bins}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + + for (std::size_t i = 0; i < Bins; i++) { + engineA.Fill(i, RWeight(0.2 + i * 0.03)); + engineB.Fill(i, RWeight(0.1 + i * 0.05)); + } + + engineA.AddAtomic(engineB); + + for (auto index : axis.GetNormalRange()) { + auto &bin = engineA.GetBinContent(index); + double weightA = 0.2 + index.GetIndex() * 0.03; + double weightB = 0.1 + index.GetIndex() * 0.05; + EXPECT_FLOAT_EQ(bin.fSum, weightA + weightB); + EXPECT_FLOAT_EQ(bin.fSum2, weightA * weightA + weightB * weightB); + } +} + +TEST(RHistEngine_RBinWithError, StressAddAtomic) +{ + static constexpr std::size_t NThreads = 4; + static constexpr std::size_t NAddsPerThread = 10000; + static constexpr std::size_t NAdds = NThreads * NAddsPerThread; + + // Fill a single bin, to maximize contention. + const RRegularAxis axis(1, {0, 1}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + engineB.Fill(0.5); + + StressInParallel(NThreads, [&] { + for (std::size_t i = 0; i < NAddsPerThread; i++) { + engineA.AddAtomic(engineB); + } + }); + + EXPECT_EQ(engineA.GetBinContent(0).fSum, NAdds); + EXPECT_EQ(engineA.GetBinContent(0).fSum2, NAdds); +} + TEST(RHistEngine_RBinWithError, FillAtomic) { static constexpr std::size_t Bins = 20; diff --git a/hist/histv7/test/hist_hist.cxx b/hist/histv7/test/hist_hist.cxx index a163a3e1ae904..1c4ce5557fcc3 100644 --- a/hist/histv7/test/hist_hist.cxx +++ b/hist/histv7/test/hist_hist.cxx @@ -52,6 +52,45 @@ TEST(RHist, Add) EXPECT_EQ(histA.GetBinContent(RBinIndex(9)), 1); } +TEST(RHist, AddAtomic) +{ + static constexpr std::size_t Bins = 20; + const RRegularAxis axis(Bins, {0, Bins}); + RHist histA({axis}); + RHist histB({axis}); + + histA.Fill(8.5); + histB.Fill(9.5); + + histA.AddAtomic(histB); + + EXPECT_EQ(histA.GetNEntries(), 2); + EXPECT_EQ(histA.GetBinContent(RBinIndex(8)), 1); + EXPECT_EQ(histA.GetBinContent(RBinIndex(9)), 1); +} + +TEST(RHist, StressAddAtomic) +{ + static constexpr std::size_t NThreads = 4; + static constexpr std::size_t NAddsPerThread = 10000; + static constexpr std::size_t NAdds = NThreads * NAddsPerThread; + + // Fill a single bin, to maximize contention. + const RRegularAxis axis(1, {0, 1}); + RHist histA({axis}); + RHist histB({axis}); + histB.Fill(0.5); + + StressInParallel(NThreads, [&] { + for (std::size_t i = 0; i < NAddsPerThread; i++) { + histA.AddAtomic(histB); + } + }); + + EXPECT_EQ(histA.GetNEntries(), NAdds); + EXPECT_EQ(histA.GetBinContent(0), NAdds); +} + TEST(RHist, Clear) { static constexpr std::size_t Bins = 20; diff --git a/hist/histv7/test/hist_stats.cxx b/hist/histv7/test/hist_stats.cxx index fbcf8902e2e3f..bd60d71eadb43 100644 --- a/hist/histv7/test/hist_stats.cxx +++ b/hist/histv7/test/hist_stats.cxx @@ -123,6 +123,77 @@ TEST(RHistStats, AddDifferent) EXPECT_THROW(statsA.Add(statsB), std::invalid_argument); } +TEST(RHistStats, AddAtomic) +{ + RHistStats statsA(2); + RHistStats statsB(2); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + statsA.Fill(i, 2 * i); + statsB.Fill(2 * i, 3 * i, RWeight(0.1 + 0.03 * i)); + } + + statsA.AddAtomic(statsB); + + ASSERT_EQ(statsA.GetNEntries(), 2 * Entries); + EXPECT_DOUBLE_EQ(statsA.GetSumW(), 27.7); + EXPECT_DOUBLE_EQ(statsA.GetSumW2(), 23.563); + + { + const auto &dimensionStats = statsA.GetDimensionStats(/*=0*/); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX, 376.2); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX2, 7790); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX3, 200019.84); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX4, 5846915.6); + } + { + const auto &dimensionStats = statsA.GetDimensionStats(1); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX, 659.3); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX2, 21850); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX3, 842029.46); + EXPECT_FLOAT_EQ(dimensionStats.fSumWX4, 35754169.6); + } +} + +TEST(RHistStats, StressAddAtomic) +{ + static constexpr std::size_t NThreads = 4; + static constexpr std::size_t NAddsPerThread = 10000; + static constexpr std::size_t NAdds = NThreads * NAddsPerThread; + static constexpr double X = 1.5; + static constexpr double Weight = 0.5; + + // Use a single dimension, to maximize contention. + RHistStats statsA(1); + RHistStats statsB(1); + statsB.Fill(X, RWeight(Weight)); + + StressInParallel(NThreads, [&] { + for (std::size_t i = 0; i < NAddsPerThread; i++) { + statsA.AddAtomic(statsB); + } + }); + + EXPECT_EQ(statsA.GetNEntries(), NAdds); + EXPECT_DOUBLE_EQ(statsA.GetSumW(), NAdds * Weight); + EXPECT_DOUBLE_EQ(statsA.GetSumW2(), NAdds * Weight * Weight); + + const auto &dimensionStats = statsA.GetDimensionStats(); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, NAdds * Weight * X); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, NAdds * Weight * X * X); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, NAdds * Weight * X * X * X); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, NAdds * Weight * X * X * X * X); +} + +TEST(RHistStats, AddAtomicDifferent) +{ + RHistStats statsA(2); + RHistStats statsB(3); + + EXPECT_THROW(statsA.AddAtomic(statsB), std::invalid_argument); +} + TEST(RHistStats, Clear) { RHistStats stats(2); diff --git a/hist/histv7/test/hist_user.cxx b/hist/histv7/test/hist_user.cxx index 3d7a37119be07..e109a74a3e47a 100644 --- a/hist/histv7/test/hist_user.cxx +++ b/hist/histv7/test/hist_user.cxx @@ -34,6 +34,8 @@ struct User { void AtomicInc() { ROOT::Experimental::Internal::AtomicInc(&fValue); } void AtomicAdd(double w) { ROOT::Experimental::Internal::AtomicAdd(&fValue, w); } + + void AtomicAdd(const User &rhs) { ROOT::Experimental::Internal::AtomicAdd(&fValue, rhs.fValue); } }; static_assert(std::is_nothrow_move_constructible_v>); @@ -59,6 +61,23 @@ TEST(RHistEngineUser, Add) EXPECT_EQ(engineA.GetBinContent(RBinIndex(9)).fValue, 1); } +TEST(RHistEngineUser, AddAtomic) +{ + // Addition with atomic instructions uses AtomicAdd(const User &) + static constexpr std::size_t Bins = 20; + const RRegularAxis axis(Bins, {0, Bins}); + RHistEngine engineA({axis}); + RHistEngine engineB({axis}); + + engineA.Fill(8.5); + engineB.Fill(9.5); + + engineA.AddAtomic(engineB); + + EXPECT_EQ(engineA.GetBinContent(RBinIndex(8)).fValue, 1); + EXPECT_EQ(engineA.GetBinContent(RBinIndex(9)).fValue, 1); +} + TEST(RHistEngineUser, Clear) { // Clearing assigns default-constructed objects.