Skip to content

Commit 0065e65

Browse files
committed
Introduce TMath::KNNDensity and deprecate TH1K
The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Instead, one should use the new `TMath::KNNDensity` function that implements the same mathematical logic.
1 parent 8551484 commit 0065e65

File tree

16 files changed

+147
-140
lines changed

16 files changed

+147
-140
lines changed

README/ReleaseNotes/v638/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* The `rpath` build option is deprecated. It is now without effect.
1111
Relative RPATHs to the main ROOT libraries are unconditionally appended to all ROOT executables and libraries if the operating system supports it.
1212
If you want a ROOT build without RPATHs, use the canonical CMake variable `CMAKE_SKIP_INSTALL_RPATH=TRUE`.
13+
* The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Please use the new `TMath::KNNDensity` function that implements the same mathematical logic.
1314

1415
## Core Libraries
1516
* Behavior change: when selecting a template instantiation for a dictionary, all the template arguments have to be fully defined - the forward declarations are not enough any more. The error prompted by the dictionary generator will be `Warning: Unused class rule: MyTemplate<MyFwdDeclaredClass>`.

bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,6 @@ def _FillWithNumpyArray(self, *args):
233233
"TH1L",
234234
"TH1F",
235235
"TH1D",
236-
"TH1K",
237236
"TProfile",
238237
]
239238

bindings/pyroot/pythonizations/python/ROOT/_pythonization/_uhi.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,6 @@ def _get_sum_of_weights_squared(self) -> np.typing.NDArray[Any]: # noqa: F821
566566
"TH1C": _values_by_copy,
567567
"TH2C": _values_by_copy,
568568
"TH3C": _values_by_copy,
569-
"TH1K": _values_by_copy,
570569
"TH2K": _values_by_copy,
571570
"TH3K": _values_by_copy,
572571
"TProfile": _values_by_copy,

bindings/pyroot/pythonizations/test/memory.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def test_objects_ownership_with_subdir(self):
8989
"TH1L": ("h", "h", 10, 0, 10),
9090
"TH1F": ("h", "h", 10, 0, 10),
9191
"TH1D": ("h", "h", 10, 0, 10),
92-
"TH1K": ("h", "h", 10, 0, 10),
9392
"TProfile": ("h", "h", 10, 0, 10),
9493
"TH2C": ("h", "h", 10, 0, 10, 10, 0, 10),
9594
"TH2S": ("h", "h", 10, 0, 10, 10, 0, 10),

bindings/pyroot/pythonizations/test/uhi_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def _special_setting(hist):
1212
# For these classes, SetBinContent works differently than for other classes,
1313
# as in it does not set the bin content to the specified value, but does some other calculations
1414
# for that, these classes will be tested differently
15-
return isinstance(hist, (ROOT.TProfile, ROOT.TProfile2D, ROOT.TProfile2Poly, ROOT.TProfile3D, ROOT.TH1K))
15+
return isinstance(hist, (ROOT.TProfile, ROOT.TProfile2D, ROOT.TProfile2Poly, ROOT.TProfile3D))
1616

1717

1818
def _get_index_for_dimension(hist, index):

gui/webgui6/src/TWebCanvas.cxx

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include "TF2.h"
3737
#include "TH1.h"
3838
#include "TH2.h"
39-
#include "TH1K.h"
4039
#include "THStack.h"
4140
#include "TMultiGraph.h"
4241
#include "TEnv.h"
@@ -935,27 +934,6 @@ void TWebCanvas::CreatePadSnapshot(TPadWebSnapshot &paddata, TPad *pad, Long64_t
935934
CreatePadSnapshot(paddata.NewSubPad(), (TPad *)obj, version, nullptr);
936935
} else if (!process_primitives) {
937936
continue;
938-
} else if (obj->InheritsFrom(TH1K::Class())) {
939-
flush_master();
940-
TH1K *hist = static_cast<TH1K *>(obj);
941-
942-
Int_t nbins = hist->GetXaxis()->GetNbins();
943-
944-
TH1D *h1 = new TH1D("__dummy_name__", hist->GetTitle(), nbins, hist->GetXaxis()->GetXmin(), hist->GetXaxis()->GetXmax());
945-
h1->SetDirectory(nullptr);
946-
h1->SetName(hist->GetName());
947-
hist->TAttLine::Copy(*h1);
948-
hist->TAttFill::Copy(*h1);
949-
hist->TAttMarker::Copy(*h1);
950-
for (Int_t n = 1; n <= nbins; ++n)
951-
h1->SetBinContent(n, hist->GetBinContent(n));
952-
953-
TIter fiter(hist->GetListOfFunctions());
954-
while (auto fobj = fiter())
955-
h1->GetListOfFunctions()->Add(fobj->Clone());
956-
957-
paddata.NewPrimitive(obj, iter.GetOption()).SetSnapshot(TWebSnapshot::kObject, h1, kTRUE);
958-
959937
} else if (obj->InheritsFrom(TH1::Class())) {
960938
flush_master();
961939

hist/hist/inc/TH1K.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,6 @@ class TH1K : public TH1, public TArrayF {
6868
void SetKOrd(Int_t k){fKOrd=k;}
6969

7070
ClassDefOverride(TH1K,2) //1-Dim Nearest Kth neighbour method
71-
};
71+
} R__DEPRECATED(6, 40, "Use TMath::KNNDensity");
7272

7373
#endif

hist/hist/src/TH1K.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ In this method :
2828
that DistanceToNearestKthNeighbour > BinWidth and K >=3
2929
3030
This class has been implemented by Victor Perevoztchikov <perev@bnl.gov>
31+
32+
33+
\deprecated The `TH1K` class is deprecated and will be removed in 6.40. It did not implement the `TH1` interface consistently, and limited the usability of the k-neighbors method it implemented by closely coupling the algorithm with the histogram class. Please use the new `TMath::KNNDensity` function that implements the same mathematical logic.
3134
*/
3235

3336
ClassImp(TH1K);

math/mathcore/inc/TMath.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "TMathBase.h"
1616

1717
#include "TError.h"
18+
#include "ROOT/RSpan.hxx"
19+
1820
#include <algorithm>
1921
#include <limits>
2022
#include <cmath>
@@ -574,6 +576,9 @@ struct Limits {
574576
Double_t Gamma(Double_t a,Double_t x);
575577
Double_t GammaDist(Double_t x, Double_t gamma, Double_t mu=0, Double_t beta=1);
576578
Double_t LnGamma(Double_t z);
579+
580+
void KNNDensity(std::span<const double> observations, std::span<const double> queries, std::span<double> result,
581+
int k, double dmin = 0.0);
577582
}
578583

579584
////////////////////////////////////////////////////////////////////////////////

math/mathcore/src/TMath.cxx

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3195,6 +3195,87 @@ Double_t TMath::VavilovDenEval(Double_t rlam, Double_t *AC, Double_t *HC, Int_t
31953195
return v;
31963196
}
31973197

3198+
/// \brief Computes a 1D k-nearest neighbor density estimate for a set of query points.
3199+
///
3200+
/// For each query point, this function estimates the local density based on
3201+
/// the distance to the k-th nearest neighbor. A minimum distance threshold
3202+
/// `dmin` is used to avoid division by zero and numerical instability when a
3203+
/// query point coincides with an observation or neighbors are extremely close.
3204+
///
3205+
/// \param observations Data points used for density estimation.
3206+
/// \param queries Points at which to estimate the density.
3207+
/// \param result Output for the density estimates. Must have the same size as `queries`.
3208+
/// \param k Number of nearest neighbors to consider. If k >= number of observations,
3209+
/// it is automatically reduced to n-1.
3210+
/// \param dmin Minimum allowed neighbor distance to prevent division by zero. Default is zero.
3211+
///
3212+
/// The density estimate for each query point x is:
3213+
///
3214+
/// \f[
3215+
/// \hat{p}(x) = \frac{n}{2(n+1)} \frac{k_{\text{actual}}}{d_\(k_{\text{actual}}\)(x)},
3216+
/// \f]
3217+
///
3218+
/// where \(d_\(k_{\text{actual}}\)(x)\) is the distance and \(k_{\text{actual}}\) may be different from \(k\)
3219+
/// when it needs to be greater than \(k\) to keep the \(d > d_{\text{min}} constraint.
3220+
/// If no neighbor is outside `dmin`, the minimum allowed distance will be used as the distance itself.
3221+
///
3222+
/// \note This function implements the math of the former `TH1K` class. There are some differences:
3223+
///
3224+
/// 1. The `TH1K` class additionally multiplied all density estimates with the bin width.
3225+
/// 2. If the `k` parameter was not specified by the user, the `TH1K` class used the bin width as `dmin`.
3226+
/// 3. If the `k` parameter was explicitly specified, the `dmin` parameter was set to 1.e-6.
3227+
void TMath::KNNDensity(std::span<const double> observations, std::span<const double> queries, std::span<double> result,
3228+
int k, double dmin)
3229+
{
3230+
int n = observations.size();
3231+
if (k < 0) {
3232+
Error("KNNDensity", "k must be positive");
3233+
return;
3234+
}
3235+
if (k >= static_cast<int>(n))
3236+
k = static_cast<int>(n - 1);
3237+
3238+
if (result.size() != queries.size()) {
3239+
Error("KNNDensity", "Result span size must match query span size");
3240+
return;
3241+
}
3242+
3243+
// Make a sorted copy of the observations for binary search
3244+
std::vector<double> sorted_obs(observations.begin(), observations.end());
3245+
std::sort(sorted_obs.begin(), sorted_obs.end());
3246+
3247+
// constant factor in the output
3248+
const double factor = n * 0.5 / (n + 1.0);
3249+
3250+
for (std::size_t i = 0; i < queries.size(); ++i) {
3251+
double x = queries[i];
3252+
3253+
// Find index in sorted_obs
3254+
int jr = std::distance(sorted_obs.begin(), std::lower_bound(sorted_obs.begin(), sorted_obs.end(), x));
3255+
int jl = jr - 1;
3256+
3257+
double ff = 0.0;
3258+
int k_actual = 0;
3259+
3260+
for (; k_actual + 1 <= k || ff <= dmin; ++k_actual) {
3261+
3262+
double fl = (jl >= 0) ? std::abs(sorted_obs[jl] - x) : std::numeric_limits<double>::max();
3263+
double fr = (jr < n) ? std::abs(sorted_obs[jr] - x) : std::numeric_limits<double>::max();
3264+
3265+
if (jl < 0 && jr >= n)
3266+
break;
3267+
3268+
ff = std::min(fl, fr);
3269+
fl < fr ? --jl : ++jr;
3270+
}
3271+
3272+
if (ff == 0.0)
3273+
ff = dmin; // avoid division by zero
3274+
3275+
// Modified kNN density formula
3276+
result[i] = factor == 0.0 ? 0.0 : factor * k_actual / ff;
3277+
}
3278+
}
31983279

31993280
//explicitly instantiate template functions from VecCore
32003281
#ifdef R__HAS_VECCORE

0 commit comments

Comments
 (0)