From bb5f7b223351c82ef5cf25375b949bd3f4e398e2 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Wed, 27 Aug 2025 17:55:32 +0200 Subject: [PATCH] Introduce `TMath::KNNDensity` and deprecate `TH1K` The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Instead, one should use the new `TMath::KNNDensity` function that implements the same mathematical logic. --- README/ReleaseNotes/v638/index.md | 1 + .../python/ROOT/_pythonization/_th1.py | 1 - .../python/ROOT/_pythonization/_uhi.py | 1 - bindings/pyroot/pythonizations/test/memory.py | 1 - .../pythonizations/test/uhi_indexing.py | 2 +- gui/webgui6/src/TWebCanvas.cxx | 22 ----- hist/hist/inc/TH1K.h | 2 +- hist/hist/src/TH1K.cxx | 3 + math/mathcore/inc/TMath.h | 5 ++ math/mathcore/src/TMath.cxx | 81 +++++++++++++++++++ math/mathcore/test/CMakeLists.txt | 1 + math/mathcore/test/testKNNDensity.cxx | 64 +++++++++++++++ roottest/graphics/CMakeLists.txt | 1 - roottest/graphics/macros/hist/hksimple.C | 55 ------------- tutorials/hist/hist012_TH1_hksimple.C | 56 ------------- tutorials/hist/index.md | 1 - 16 files changed, 157 insertions(+), 140 deletions(-) create mode 100644 math/mathcore/test/testKNNDensity.cxx delete mode 100644 roottest/graphics/macros/hist/hksimple.C delete mode 100644 tutorials/hist/hist012_TH1_hksimple.C diff --git a/README/ReleaseNotes/v638/index.md b/README/ReleaseNotes/v638/index.md index 31fce2df63c6a..34e07537f9e87 100644 --- a/README/ReleaseNotes/v638/index.md +++ b/README/ReleaseNotes/v638/index.md @@ -10,6 +10,7 @@ * The `rpath` build option is deprecated. It is now without effect. Relative RPATHs to the main ROOT libraries are unconditionally appended to all ROOT executables and libraries if the operating system supports it. If you want a ROOT build without RPATHs, use the canonical CMake variable `CMAKE_SKIP_INSTALL_RPATH=TRUE`. +* The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Please use the new `TMath::KNNDensity` function that implements the same mathematical logic. ## Core Libraries * Behavior change: when selecting a template instantiation for a dictionary, all the template arguments have to be fully defined - the forward declarations are not enough any more. The error prompted by the dictionary generator will be `Warning: Unused class rule: MyTemplate`. diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py index 2d44751445008..0e9a819102c6d 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py @@ -233,7 +233,6 @@ def _FillWithNumpyArray(self, *args): "TH1L", "TH1F", "TH1D", - "TH1K", "TProfile", ] diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_uhi.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_uhi.py index 5faebd583396a..301c8e3a91805 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_uhi.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_uhi.py @@ -566,7 +566,6 @@ def _get_sum_of_weights_squared(self) -> np.typing.NDArray[Any]: # noqa: F821 "TH1C": _values_by_copy, "TH2C": _values_by_copy, "TH3C": _values_by_copy, - "TH1K": _values_by_copy, "TH2K": _values_by_copy, "TH3K": _values_by_copy, "TProfile": _values_by_copy, diff --git a/bindings/pyroot/pythonizations/test/memory.py b/bindings/pyroot/pythonizations/test/memory.py index 4d8e767a9ff7f..71c43302718d8 100644 --- a/bindings/pyroot/pythonizations/test/memory.py +++ b/bindings/pyroot/pythonizations/test/memory.py @@ -89,7 +89,6 @@ def test_objects_ownership_with_subdir(self): "TH1L": ("h", "h", 10, 0, 10), "TH1F": ("h", "h", 10, 0, 10), "TH1D": ("h", "h", 10, 0, 10), - "TH1K": ("h", "h", 10, 0, 10), "TProfile": ("h", "h", 10, 0, 10), "TH2C": ("h", "h", 10, 0, 10, 10, 0, 10), "TH2S": ("h", "h", 10, 0, 10, 10, 0, 10), diff --git a/bindings/pyroot/pythonizations/test/uhi_indexing.py b/bindings/pyroot/pythonizations/test/uhi_indexing.py index c462ff8ff8e32..31d30e15f5f9a 100644 --- a/bindings/pyroot/pythonizations/test/uhi_indexing.py +++ b/bindings/pyroot/pythonizations/test/uhi_indexing.py @@ -12,7 +12,7 @@ def _special_setting(hist): # For these classes, SetBinContent works differently than for other classes, # as in it does not set the bin content to the specified value, but does some other calculations # for that, these classes will be tested differently - return isinstance(hist, (ROOT.TProfile, ROOT.TProfile2D, ROOT.TProfile2Poly, ROOT.TProfile3D, ROOT.TH1K)) + return isinstance(hist, (ROOT.TProfile, ROOT.TProfile2D, ROOT.TProfile2Poly, ROOT.TProfile3D)) def _get_index_for_dimension(hist, index): diff --git a/gui/webgui6/src/TWebCanvas.cxx b/gui/webgui6/src/TWebCanvas.cxx index 4e6b39473ad10..fcdfa7cdd9021 100644 --- a/gui/webgui6/src/TWebCanvas.cxx +++ b/gui/webgui6/src/TWebCanvas.cxx @@ -36,7 +36,6 @@ #include "TF2.h" #include "TH1.h" #include "TH2.h" -#include "TH1K.h" #include "THStack.h" #include "TMultiGraph.h" #include "TEnv.h" @@ -935,27 +934,6 @@ void TWebCanvas::CreatePadSnapshot(TPadWebSnapshot &paddata, TPad *pad, Long64_t CreatePadSnapshot(paddata.NewSubPad(), (TPad *)obj, version, nullptr); } else if (!process_primitives) { continue; - } else if (obj->InheritsFrom(TH1K::Class())) { - flush_master(); - TH1K *hist = static_cast(obj); - - Int_t nbins = hist->GetXaxis()->GetNbins(); - - TH1D *h1 = new TH1D("__dummy_name__", hist->GetTitle(), nbins, hist->GetXaxis()->GetXmin(), hist->GetXaxis()->GetXmax()); - h1->SetDirectory(nullptr); - h1->SetName(hist->GetName()); - hist->TAttLine::Copy(*h1); - hist->TAttFill::Copy(*h1); - hist->TAttMarker::Copy(*h1); - for (Int_t n = 1; n <= nbins; ++n) - h1->SetBinContent(n, hist->GetBinContent(n)); - - TIter fiter(hist->GetListOfFunctions()); - while (auto fobj = fiter()) - h1->GetListOfFunctions()->Add(fobj->Clone()); - - paddata.NewPrimitive(obj, iter.GetOption()).SetSnapshot(TWebSnapshot::kObject, h1, kTRUE); - } else if (obj->InheritsFrom(TH1::Class())) { flush_master(); diff --git a/hist/hist/inc/TH1K.h b/hist/hist/inc/TH1K.h index 4d1d99b7cd4cf..57666039e280e 100644 --- a/hist/hist/inc/TH1K.h +++ b/hist/hist/inc/TH1K.h @@ -68,6 +68,6 @@ class TH1K : public TH1, public TArrayF { void SetKOrd(Int_t k){fKOrd=k;} ClassDefOverride(TH1K,2) //1-Dim Nearest Kth neighbour method -}; +} R__DEPRECATED(6, 40, "Use TMath::KNNDensity"); #endif diff --git a/hist/hist/src/TH1K.cxx b/hist/hist/src/TH1K.cxx index 2f3bd8281c694..80127a3a45add 100644 --- a/hist/hist/src/TH1K.cxx +++ b/hist/hist/src/TH1K.cxx @@ -28,6 +28,9 @@ In this method : that DistanceToNearestKthNeighbour > BinWidth and K >=3 This class has been implemented by Victor Perevoztchikov + + +\deprecated The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Please use the new `TMath::KNNDensity` function that implements the same mathematical logic. */ diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h index d102d649f3420..0ab9ba20e7af0 100644 --- a/math/mathcore/inc/TMath.h +++ b/math/mathcore/inc/TMath.h @@ -15,6 +15,8 @@ #include "TMathBase.h" #include "TError.h" +#include "ROOT/RSpan.hxx" + #include #include #include @@ -574,6 +576,9 @@ struct Limits { Double_t Gamma(Double_t a,Double_t x); Double_t GammaDist(Double_t x, Double_t gamma, Double_t mu=0, Double_t beta=1); Double_t LnGamma(Double_t z); + + void KNNDensity(std::span observations, std::span queries, std::span result, + int k, double dmin = 0.0); } //////////////////////////////////////////////////////////////////////////////// diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx index 48b7d0dbcfd62..d25758bcd05e4 100644 --- a/math/mathcore/src/TMath.cxx +++ b/math/mathcore/src/TMath.cxx @@ -3195,6 +3195,87 @@ Double_t TMath::VavilovDenEval(Double_t rlam, Double_t *AC, Double_t *HC, Int_t return v; } +/// \brief Computes a 1D k-nearest neighbor density estimate for a set of query points. +/// +/// For each query point, this function estimates the local density based on +/// the distance to the k-th nearest neighbor. A minimum distance threshold +/// `dmin` is used to avoid division by zero and numerical instability when a +/// query point coincides with an observation or neighbors are extremely close. +/// +/// \param observations Data points used for density estimation. +/// \param queries Points at which to estimate the density. +/// \param result Output for the density estimates. Must have the same size as `queries`. +/// \param k Number of nearest neighbors to consider. If k >= number of observations, +/// it is automatically reduced to n-1. +/// \param dmin Minimum allowed neighbor distance to prevent division by zero. Default is zero. +/// +/// The density estimate for each query point x is: +/// +/// \f[ +/// \hat{p}(x) = \frac{n}{2(n+1)} \frac{k_{\text{actual}}}{d_\(k_{\text{actual}}\)(x)}, +/// \f] +/// +/// where \(d_\(k_{\text{actual}}\)(x)\) is the distance and \(k_{\text{actual}}\) may be different from \(k\) +/// when it needs to be greater than \(k\) to keep the \(d > d_{\text{min}} constraint. +/// If no neighbor is outside `dmin`, the minimum allowed distance will be used as the distance itself. +/// +/// \note This function implements the math of the former `TH1K` class. There are some differences: +/// +/// 1. The `TH1K` class additionally multiplied all density estimates with the bin width. +/// 2. If the `k` parameter was not specified by the user, the `TH1K` class used the bin width as `dmin`. +/// 3. If the `k` parameter was explicitly specified, the `dmin` parameter was set to 1.e-6. +void TMath::KNNDensity(std::span observations, std::span queries, std::span result, + int k, double dmin) +{ + int n = observations.size(); + if (k < 0) { + Error("KNNDensity", "k must be positive"); + return; + } + if (k >= static_cast(n)) + k = static_cast(n - 1); + + if (result.size() != queries.size()) { + Error("KNNDensity", "Result span size must match query span size"); + return; + } + + // Make a sorted copy of the observations for binary search + std::vector sorted_obs(observations.begin(), observations.end()); + std::sort(sorted_obs.begin(), sorted_obs.end()); + + // constant factor in the output + const double factor = n * 0.5 / (n + 1.0); + + for (std::size_t i = 0; i < queries.size(); ++i) { + double x = queries[i]; + + // Find index in sorted_obs + int jr = std::distance(sorted_obs.begin(), std::lower_bound(sorted_obs.begin(), sorted_obs.end(), x)); + int jl = jr - 1; + + double ff = 0.0; + int k_actual = 0; + + for (; k_actual + 1 <= k || ff <= dmin; ++k_actual) { + + double fl = (jl >= 0) ? std::abs(sorted_obs[jl] - x) : std::numeric_limits::max(); + double fr = (jr < n) ? std::abs(sorted_obs[jr] - x) : std::numeric_limits::max(); + + if (jl < 0 && jr >= n) + break; + + ff = std::min(fl, fr); + fl < fr ? --jl : ++jr; + } + + if (ff == 0.0) + ff = dmin; // avoid division by zero + + // Modified kNN density formula + result[i] = factor == 0.0 ? 0.0 : factor * k_actual / ff; + } +} //explicitly instantiate template functions from VecCore #ifdef R__HAS_VECCORE diff --git a/math/mathcore/test/CMakeLists.txt b/math/mathcore/test/CMakeLists.txt index 2d184210766bf..94e72e706df49 100644 --- a/math/mathcore/test/CMakeLists.txt +++ b/math/mathcore/test/CMakeLists.txt @@ -95,6 +95,7 @@ ROOT_ADD_GTEST(testRootFinder testRootFinder.cxx LIBRARIES ${Libraries}) ROOT_ADD_GTEST(testKahan testKahan.cxx LIBRARIES Core MathCore) ROOT_ADD_GTEST(testDelaunay2D testDelaunay2D.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(testKNNDensity testKNNDensity.cxx LIBRARIES Core MathCore Hist) if(clad) ROOT_ADD_GTEST(CladDerivatorTests CladDerivatorTests.cxx LIBRARIES Core MathCore) diff --git a/math/mathcore/test/testKNNDensity.cxx b/math/mathcore/test/testKNNDensity.cxx new file mode 100644 index 0000000000000..5093e312f7121 --- /dev/null +++ b/math/mathcore/test/testKNNDensity.cxx @@ -0,0 +1,64 @@ +#include "TMath.h" +#include "TH1K.h" + +#include "gtest/gtest.h" + +TEST(DNNDensity, BasicTests) +{ + auto check = [](std::vector values, int k, double dist, int k_actual) { + double query = 0.; + std::span queryspan{&query, 1}; + + double output = 0.; + std::span outspan{&output, 1}; + + // To compute the reference value + std::size_t n = values.size(); + const double factor = n * 0.5 / (n + 1.0); + + if (!values.empty()) + dist = std::min(dist, values.back() - values.front()); + + double dmin = 1.; + TMath::KNNDensity(values, queryspan, outspan, k, k == 0 ? dmin : 0.); + double ref = factor == 0. ? 0. : factor * k_actual * (1. / std::abs(dist)); + + EXPECT_DOUBLE_EQ(output, ref); + + // Remove the next block once the deprecated TH1K class is removed. It + // was meant to ensure that the new TMath implementation given the same + // results. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + TH1K hist{"hist", "hist", 19, -9.5, 9.5}; + hist.SetKOrd(k); + for (auto &x : values) { + hist.Fill(x); + } + double th1kval = hist.GetBinContent(10); + // The old TH1K used only single precision, so we can't compare with + // double precision + EXPECT_FLOAT_EQ(output, th1kval); +#pragma GCC diagnostic pop + }; + + check({}, /*k*/ 0, /*dist*/ 0., /*k_actual*/ 0); + check({}, /*k*/ 1, /*dist*/ 0., /*k_actual*/ 0); + check({}, /*k*/ 2, /*dist*/ 0., /*k_actual*/ 0); + + std::vector values{-3.1, -2.4, -1.5, 0., 0.9, 1.9, 2.9, 4.4}; + std::vector valuesabs = values; + std::sort(valuesabs.begin(), valuesabs.end(), [](double a, double b) { return std::abs(a) < std::abs(b); }); + + for (std::size_t i = 0; i < values.size() - 1; ++i) { + double dist = valuesabs[std::max(i, std::size_t(1))]; + int k_actual = std::max(i + 1, std::size_t(2)); + check(values, /*k=*/i + 1, dist, k_actual); + } + + check({0., 0.1}, 0, 1, 2); + + check({3.5, 2.5, 1.5, 0., 1., 2., 3., 4.}, 0, 1, 1); + + check({0., 0.2, 0.4, 0.6, 0.8, 1., 1.2, 1.4}, 0, 1.2, 7); +} diff --git a/roottest/graphics/CMakeLists.txt b/roottest/graphics/CMakeLists.txt index 4b9845b1efb50..b3fa484a4bd97 100644 --- a/roottest/graphics/CMakeLists.txt +++ b/roottest/graphics/CMakeLists.txt @@ -112,7 +112,6 @@ set(TEST_DESCRIPTIONS "h2_cut a hist" "h2proj a hist" "histpalettecolor a hist" - "hksimple a hist" "hlHisto1 a hist" "hlHisto2 a hist" #"hlHisto4 a hist" TODO: Fails on all platforms! diff --git a/roottest/graphics/macros/hist/hksimple.C b/roottest/graphics/macros/hist/hksimple.C deleted file mode 100644 index 717ae512b3647..0000000000000 --- a/roottest/graphics/macros/hist/hksimple.C +++ /dev/null @@ -1,55 +0,0 @@ -/// \file -/// \ingroup tutorial_hist -/// \notebook -/// Illustrates the advantages of a TH1K histogram -/// -/// \macro_image -/// \macro_code -/// -/// \author Victor Perevovchikov - -void canvasRefresh(TCanvas *c1) -{ - for (Int_t j = 0; j < 3; j++) - c1->GetPad(j+1)->Modified(); - - c1->Modified(); - c1->Update(); - - gSystem->ProcessEvents(); -} - -void hksimple() -{ -// Create a new canvas. - TCanvas* c1 = new TCanvas("c1","Dynamic Filling Example",200,10,600,900); - -// Create a normal histogram and two TH1K histograms - TH1 *hpx[3]; - hpx[0] = new TH1F("hp0","Normal histogram",1000,-4,4); - hpx[1] = new TH1K("hk1","Nearest Neighbour of order 3",1000,-4,4); - hpx[2] = new TH1K("hk2","Nearest Neighbour of order 16",1000,-4,4,16); - c1->Divide(1,3); - for (Int_t j = 0; j < 3; j++) { - c1->cd(j + 1); - hpx[j]->SetFillColor(48); - hpx[j]->Draw(); - } - -// Fill histograms randomly - gRandom->SetSeed(12345); - Float_t px, py, pz; - const Int_t kUPDATE = 10; - for (Int_t i = 0; i <= 300; i++) { - gRandom->Rannor(px,py); - for (Int_t j = 0; j < 3; j++) - hpx[j]->Fill(px); - if (i && (i % kUPDATE) == 0) - canvasRefresh(c1); - } - - for (Int_t j = 0; j < 3; j++) - hpx[j]->Fit("gaus"); - - canvasRefresh(c1); -} \ No newline at end of file diff --git a/tutorials/hist/hist012_TH1_hksimple.C b/tutorials/hist/hist012_TH1_hksimple.C deleted file mode 100644 index c47a569a4747a..0000000000000 --- a/tutorials/hist/hist012_TH1_hksimple.C +++ /dev/null @@ -1,56 +0,0 @@ -/// \file -/// \ingroup tutorial_hist -/// \notebook -/// \preview Illustrates the advantages of a TH1K histogram. -/// -/// \macro_image -/// \macro_code -/// -/// \date November 2022 -/// \author Victor Perevovchikov - -void canvasRefresh(TCanvas *c1) -{ - for (Int_t j = 0; j < 3; j++) - c1->GetPad(j + 1)->Modified(); - - c1->Modified(); - c1->Update(); - - gSystem->ProcessEvents(); -} - -void hist012_TH1_hksimple() -{ - // Create a new canvas. - TCanvas *c1 = new TCanvas("c1", "Dynamic Filling Example", 200, 10, 600, 900); - - // Create a normal histogram and two TH1K histograms - TH1 *hpx[3]; - hpx[0] = new TH1F("hp0", "Normal histogram", 1000, -4, 4); - hpx[1] = new TH1K("hk1", "Nearest Neighbour of order 3", 1000, -4, 4); - hpx[2] = new TH1K("hk2", "Nearest Neighbour of order 16", 1000, -4, 4, 16); - c1->Divide(1, 3); - for (Int_t j = 0; j < 3; j++) { - c1->cd(j + 1); - hpx[j]->SetFillColor(48); - hpx[j]->Draw(); - } - - // Fill histograms randomly - gRandom->SetSeed(12345); - Float_t px, py, pz; - const Int_t kUPDATE = 10; - for (Int_t i = 0; i <= 300; i++) { - gRandom->Rannor(px, py); - for (Int_t j = 0; j < 3; j++) - hpx[j]->Fill(px); - if (i && (i % kUPDATE) == 0) - canvasRefresh(c1); - } - - for (Int_t j = 0; j < 3; j++) - hpx[j]->Fit("gaus"); - - canvasRefresh(c1); -} diff --git a/tutorials/hist/index.md b/tutorials/hist/index.md index 5c0bc8bf04015..15e67eb615919 100644 --- a/tutorials/hist/index.md +++ b/tutorials/hist/index.md @@ -80,7 +80,6 @@ The examples below showcase the same functionalities through 3 different impleme | hist009_TH1_normalize.C | | | Normalizing a Histogram. | | hist010_TH1_two_scales.C | hist010_TH1_two_scales.py | hist010_TH1_two_scales_uhi.py|Draw two histograms on one canvas using different y-axis scales. | | hist011_TH1_legend_autoplaced.C | | | Automatic placing of the legend. | -| hist012_TH1_hksimple.C | | | Dynamic filling of TH1K histograms. | | hist013_TH1_rebin.C | | | Create a variable bin-width histogram and change bin sizes. | | hist014_TH1_cumulative.C | | | Illustrate use of the TH1::GetCumulative method. | | hist015_TH1_read_and_draw.C | hist015_TH1_read_and_draw.py | hist015_TH1_read_and_draw_uhi.py|Read a 1D histogram from a ROOT File and draw it. |