From d3f748ab06955916e654a4f71d8eba682386f3a0 Mon Sep 17 00:00:00 2001
From: oisin-m <oisinmmm@gmail.com>
Date: Mon, 3 Nov 2025 20:59:45 +0100
Subject: [PATCH 1/4] feat: use scores library for stats

---
 hat/compute_hydrostats/stat_calc.py | 10 ++--
 hat/compute_hydrostats/stats.py     | 83 -----------------------------
 2 files changed, 7 insertions(+), 86 deletions(-)
 delete mode 100644 hat/compute_hydrostats/stats.py

diff --git a/hat/compute_hydrostats/stat_calc.py b/hat/compute_hydrostats/stat_calc.py
index fd147ff..32e7515 100644
--- a/hat/compute_hydrostats/stat_calc.py
+++ b/hat/compute_hydrostats/stat_calc.py
@@ -2,7 +2,7 @@
 from earthkit.hydro._readers import find_main_var
 import numpy as np
 import xarray as xr
-from hat.compute_hydrostats import stats
+import scores
 
 
 def load_da(ds_config):
@@ -40,10 +40,14 @@ def stat_calc(config):
     obs_da = load_da(obs_config)
     new_coords = config["output"]["coords"]
     sim_da, obs_da = find_valid_subset(sim_da, obs_da, sim_config["coords"], obs_config["coords"], new_coords)
+    time_dim = new_coords.get("t", "time")
     stat_dict = {}
     for stat in config["stats"]:
-        func = getattr(stats, stat)
-        stat_dict[stat] = func(sim_da, obs_da, new_coords.get("t", "time"))
+        parts = stat.split(".")
+        func = scores
+        for part in parts:
+            func = getattr(func, part)
+        stat_dict[stat] = func(sim_da, obs_da, reduce_dims=time_dim)
     ds = xr.Dataset(stat_dict)
     if config["output"].get("file", None) is not None:
         ds.to_netcdf(config["output"]["file"])
diff --git a/hat/compute_hydrostats/stats.py b/hat/compute_hydrostats/stats.py
deleted file mode 100644
index 8c8aff9..0000000
--- a/hat/compute_hydrostats/stats.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import numpy as np
-import xarray as xr
-
-
-def bias(sim_da, obs_da, time_name):
-    return (sim_da - obs_da).mean(dim=time_name, skipna=True)
-
-
-def mae(sim_da, obs_da, time_name):
-    return np.abs(sim_da - obs_da).mean(dim=time_name, skipna=True)
-
-
-def mape(sim_da, obs_da, time_name):
-    return mae(sim_da, obs_da, time_name) / np.abs(obs_da).sum(dim=time_name, skipna=True)
-
-
-def mse(sim_da, obs_da, time_name):
-    return ((sim_da - obs_da) ** 2).mean(dim=time_name, skipna=True)
-
-
-def rmse(sim_da, obs_da, time_name):
-    return np.sqrt(mse(sim_da, obs_da, time_name))
-
-
-def br(sim_da, obs_da, time_name):
-    # as defined in:
-    # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277.
-    return sim_da.mean(dim=time_name, skipna=True) / obs_da.mean(dim=time_name, skipna=True)
-
-
-def vr(sim_da, obs_da, time_name):
-    # as defined in:
-    # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277.
-    return (sim_da.std(dim=time_name, skipna=True) * obs_da.mean(dim=time_name, skipna=True)) / (
-        obs_da.std(dim=time_name, skipna=True) * sim_da.mean(dim=time_name, skipna=True)
-    )
-
-
-def pc_bias(sim_da, obs_da, time_name):
-    return (sim_da - obs_da).mean(dim=time_name, skipna=True) / obs_da.mean(dim=time_name, skipna=True)
-
-
-def correlation(sim_da, obs_da, time_name):
-    def _corr(a, b):
-        mask = ~np.isnan(a) & ~np.isnan(b)
-        if np.sum(mask) < 2:
-            return np.nan
-        return np.corrcoef(a[mask], b[mask])[0, 1]
-
-    return xr.apply_ufunc(
-        _corr,
-        sim_da,
-        obs_da,
-        input_core_dims=[[time_name], [time_name]],
-        vectorize=True,
-        dask="parallelized",
-        output_dtypes=[float],
-    )
-
-
-def kge(sim_da, obs_da, time_name):
-    # as defined in:
-    # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277.
-    B = br(sim_da, obs_da, time_name)
-    y = vr(sim_da, obs_da, time_name)
-    r = correlation(sim_da, obs_da, time_name)
-
-    return 1 - np.sqrt((r - 1) ** 2 + (B - 1) ** 2 + (y - 1) ** 2)
-
-
-def index_agreement(sim_da, obs_da, time_name):
-    numerator = ((obs_da - sim_da) ** 2).sum(dim=time_name, skipna=True)
-    mean_obs = obs_da.mean(dim=time_name, skipna=True)
-    denominator = ((np.abs(sim_da - mean_obs) + np.abs(obs_da - mean_obs)) ** 2).sum(dim=time_name, skipna=True)
-
-    return 1 - (numerator / denominator)
-
-
-def nse(sim_da, obs_da, time_name):
-    numerator = ((sim_da - obs_da) ** 2).sum(dim=time_name, skipna=True)
-    denominator = ((obs_da - obs_da.mean(dim=time_name, skipna=True)) ** 2).sum(dim=time_name, skipna=True)
-
-    return 1 - (numerator / denominator)

From bb831b71573ff9098d4ff45ab6a21aea1a97237b Mon Sep 17 00:00:00 2001
From: oisin-m <oisinmmm@gmail.com>
Date: Mon, 3 Nov 2025 21:00:10 +0100
Subject: [PATCH 2/4] feat: update hydrostats notebook with scores api

---
 notebooks/workflow/hydrostats_computation.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/workflow/hydrostats_computation.ipynb b/notebooks/workflow/hydrostats_computation.ipynb
index 2292595..d2e6baf 100644
--- a/notebooks/workflow/hydrostats_computation.ipynb
+++ b/notebooks/workflow/hydrostats_computation.ipynb
@@ -19,20 +19,20 @@
    "source": [
     "config = {\n",
     "    \"sim\": {\n",
-    "        \"source\": [\"file\", \"extracted_timeseries.nc\"],\n",
+    "        \"source\": [\"file\", \"simulated_dataset.nc\"],\n",
     "        \"coords\": {\n",
     "            \"s\": \"station\",\n",
     "            \"t\": \"time\"\n",
     "        }\n",
     "    },\n",
     "    \"obs\": {\n",
-    "        \"source\": [\"file\", \"observations.nc\"],\n",
+    "        \"source\": [\"file\", \"observed_dataset.nc\"],\n",
     "        \"coords\": {\n",
     "            \"s\": \"station\",\n",
     "            \"t\": \"time\"\n",
     "        }\n",
     "    },\n",
-    "    \"stats\" : [\"bias\", \"apb\", \"apb2\"],\n",
+    "    \"stats\" : [\"continuous.kge\", \"continuous.mse\"],\n",
     "    \"output\": {\n",
     "        \"file\": \"statistics.nc\",\n",
     "        \"coords\": {\n",
@@ -49,7 +49,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "hat",
    "language": "python",
    "name": "python3"
   },

From 974124a7f69fc70629d9434e96b66ee4f6300afc Mon Sep 17 00:00:00 2001
From: oisin-m <oisinmmm@gmail.com>
Date: Mon, 3 Nov 2025 21:00:58 +0100
Subject: [PATCH 3/4] chore: add scores as dep

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 490e424..7df1775 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,8 @@ dependencies = [
     "floods-html",
     "geopy",
     "setuptools",
-    "anywidget"
+    "anywidget",
+    "scores"
 ]
 
 [project.urls]

From cc151ff9c3401df7550c6e8955691f2f1fd65def Mon Sep 17 00:00:00 2001
From: oisin-m <oisinmmm@gmail.com>
Date: Mon, 3 Nov 2025 21:05:16 +0100
Subject: [PATCH 4/4] chore: add attribution for scores

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 7585587..cce6a27 100644
--- a/README.md
+++ b/README.md
@@ -71,3 +71,9 @@ In applying this licence, ECMWF does not waive the privileges and immunities
 granted to it by virtue of its status as an intergovernmental organisation
 nor does it submit to any jurisdiction.
 ```
+
+## Attributions
+
+The hydrostats part of this library uses the [`scores`](https://scores.readthedocs.io) library [1], which is available under [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.txt).
+
+[1] Leeuwenburg, T., Loveday, N., Ebert, E. E., Cook, H., Khanarmuei, M., Taggart, R. J., Ramanathan, N., Carroll, M., Chong, S., Griffiths, A., & Sharples, J. (2024). scores: A Python package for verifying and evaluating models and predictions with xarray. Journal of Open Source Software, 9(99), 6889. https://doi.org/10.21105/joss.06889