Skip to content

[ENH] Refactor range-based metrics to restore original behavior #2781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 16, 2025
20 changes: 10 additions & 10 deletions aeon/benchmarking/metrics/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,27 @@
"ts_fscore",
]

from aeon.benchmarking.metrics.anomaly_detection._binary import (
range_f_score,
range_precision,
range_recall,
)
from aeon.benchmarking.metrics.anomaly_detection._continuous import (
f_score_at_k_points,
f_score_at_k_ranges,
pr_auc_score,
roc_auc_score,
rp_rr_auc_score,
)
from aeon.benchmarking.metrics.anomaly_detection._range_metrics import (
range_f_score,
range_precision,
range_recall,
)
from aeon.benchmarking.metrics.anomaly_detection._range_ts_metrics import (
ts_fscore,
ts_precision,
ts_recall,
)
from aeon.benchmarking.metrics.anomaly_detection._vus_metrics import (
range_pr_auc_score,
range_pr_roc_auc_support,
range_pr_vus_score,
range_roc_auc_score,
range_roc_vus_score,
)
from aeon.benchmarking.metrics.anomaly_detection.range_metrics import (
ts_fscore,
ts_precision,
ts_recall,
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,15 @@
import warnings

import numpy as np
from deprecated.sphinx import deprecated

from aeon.benchmarking.metrics.anomaly_detection._range_ts_metrics import (
_binary_to_ranges,
_ts_precision,
_ts_recall,
)
from aeon.benchmarking.metrics.anomaly_detection._util import check_y
from aeon.utils.validation._dependencies import _check_soft_dependencies


# TODO: Remove in v1.2.0
@deprecated(
version="1.1.0",
reason="range_precision is deprecated and will be removed in v1.2.0. "
"Please use ts_precision from the range_metrics module instead.",
category=FutureWarning,
)
def range_precision(
y_true: np.ndarray,
y_pred: np.ndarray,
Expand All @@ -28,24 +24,24 @@ def range_precision(
) -> float:
"""Compute the range-based precision metric.

Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
implementation uses the community package `prts <https://pypi.org/project/prts/>`_
as a soft-dependency.
Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_.

Range precision is the average precision of each predicted anomaly range. For each
predicted continuous anomaly range the overlap size, position, and cardinality is
considered. For more details, please refer to the paper [1]_.

The `alpha` parameter for the existence reward was removed. Because precision
emphasizes prediction quality, there is no need for an existence reward and this
value should always be set to 0.

Parameters
----------
y_true : np.ndarray
True binary labels of shape (n_instances,).
y_pred : np.ndarray
Anomaly scores for each point of the time series of shape (n_instances,).
alpha : float
Weight of the existence reward. Because precision by definition emphasizes on
prediction quality, there is no need for an existence reward and this value
should always be set to 0.
DEPRECATED. Default is 0 = no existence reward.
cardinality : {'reciprocal', 'one', 'udf_gamma'}
Cardinality type.
bias : {'flat', 'front', 'middle', 'back'}
Expand All @@ -64,27 +60,28 @@ def range_precision(
1920–30. 2018.
http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
"""
_check_soft_dependencies("prts", obj="range_precision", suppress_import_stdout=True)

from prts import ts_precision

if alpha != 0:
warnings.warn(
"The alpha parameter should not be used in range precision. This "
"parameter is removed in 1.3.0.",
stacklevel=2,
category=FutureWarning,
)
y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
if np.unique(y_pred).shape[0] == 1:
warnings.warn(
"Cannot compute metric for a constant value in y_score, returning 0.0!",
stacklevel=2,
)
return 0.0
return ts_precision(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias)

y_pred_ranges = _binary_to_ranges(y_pred)
y_true_ranges = _binary_to_ranges(y_true)
return _ts_precision(
y_pred_ranges, y_true_ranges, gamma=cardinality, bias_type=bias
)


# TODO: Remove in v1.2.0
@deprecated(
version="1.1.0",
reason="range_recall is deprecated and will be removed in v1.2.0. "
"Please use ts_recall from the range_metrics module instead.",
category=FutureWarning,
)
def range_recall(
y_true: np.ndarray,
y_pred: np.ndarray,
Expand All @@ -94,9 +91,7 @@ def range_recall(
) -> float:
"""Compute the range-based recall metric.

Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
implementation uses the community package `prts <https://pypi.org/project/prts/>`_
as a soft-dependency.
Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_.

Range recall is the average recall of each real anomaly range. For each real
anomaly range the overlap size, position, and cardinality with predicted anomaly
Expand Down Expand Up @@ -132,27 +127,21 @@ def range_recall(
1920–30. 2018.
http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
"""
_check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True)

from prts import ts_recall

y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
if np.unique(y_pred).shape[0] == 1:
warnings.warn(
"Cannot compute metric for a constant value in y_score, returning 0.0!",
stacklevel=2,
)
return 0.0
return ts_recall(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias)

y_pred_ranges = _binary_to_ranges(y_pred)
y_true_ranges = _binary_to_ranges(y_true)
return _ts_recall(
y_pred_ranges, y_true_ranges, alpha=alpha, gamma=cardinality, bias_type=bias
)


# TODO: Remove in v1.2.0
@deprecated(
version="1.1.0",
reason="range_f_score is deprecated and will be removed in v1.2.0. "
"Please use ts_fscore from the range_metrics module instead.",
category=FutureWarning,
)
def range_f_score(
y_true: np.ndarray,
y_pred: np.ndarray,
Expand All @@ -165,14 +154,15 @@ def range_f_score(
) -> float:
"""Compute the F-score using the range-based recall and precision metrics.

Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
implementation uses the community package `prts <https://pypi.org/project/prts/>`_
as a soft-dependency.
Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_.

The F-beta score is the weighted harmonic mean of precision and recall, reaching
its optimal value at 1 and its worst value at 0. This implementation uses the
range-based precision and range-based recall as basis.

The `p_alpha` parameter for the potential existance reward in the calculation of
range-based precision was removed. `p_alpha` should always be set to 0, anyway.

Parameters
----------
y_true : np.ndarray
Expand All @@ -183,8 +173,7 @@ def range_f_score(
F-score beta determines the weight of recall in the combined score.
beta < 1 lends more weight to precision, while beta > 1 favors recall.
p_alpha : float
Weight of the existence reward for the range-based precision. For most - when
not all - cases, `p_alpha` should be set to 0.
DEPRECATED. Default is 0 = no existence reward for precision.
r_alpha : float
Weight of the existence reward. If 0: no existence reward, if 1: only
existence reward.
Expand All @@ -208,9 +197,13 @@ def range_f_score(
1920–30. 2018.
http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
"""
_check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True)

from prts import ts_fscore
if p_alpha != 0:
warnings.warn(
"The p_alpha parameter should not be used. This parameter is removed "
"in 1.3.0.",
stacklevel=2,
category=FutureWarning,
)

y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
if np.unique(y_pred).shape[0] == 1:
Expand All @@ -219,13 +212,18 @@ def range_f_score(
stacklevel=2,
)
return 0.0
return ts_fscore(
y_true,
y_pred,
beta=beta,
p_alpha=p_alpha,
r_alpha=r_alpha,
cardinality=cardinality,
p_bias=p_bias,
r_bias=r_bias,
)

y_pred_ranges = _binary_to_ranges(y_pred)
y_true_ranges = _binary_to_ranges(y_true)

precision = _ts_precision(y_pred_ranges, y_true_ranges, cardinality, p_bias)
recall = _ts_recall(y_pred_ranges, y_true_ranges, cardinality, r_bias, r_alpha)

if precision + recall > 0:
fscore = ((1 + beta**2) * (precision * recall)) / (
beta**2 * (precision + recall)
)
else:
fscore = 0.0

return fscore
Loading