From d3f748ab06955916e654a4f71d8eba682386f3a0 Mon Sep 17 00:00:00 2001 From: oisin-m Date: Mon, 3 Nov 2025 20:59:45 +0100 Subject: [PATCH 1/4] feat: use scores library for stats --- hat/compute_hydrostats/stat_calc.py | 10 ++-- hat/compute_hydrostats/stats.py | 83 ----------------------------- 2 files changed, 7 insertions(+), 86 deletions(-) delete mode 100644 hat/compute_hydrostats/stats.py diff --git a/hat/compute_hydrostats/stat_calc.py b/hat/compute_hydrostats/stat_calc.py index fd147ff..32e7515 100644 --- a/hat/compute_hydrostats/stat_calc.py +++ b/hat/compute_hydrostats/stat_calc.py @@ -2,7 +2,7 @@ from earthkit.hydro._readers import find_main_var import numpy as np import xarray as xr -from hat.compute_hydrostats import stats +import scores def load_da(ds_config): @@ -40,10 +40,14 @@ def stat_calc(config): obs_da = load_da(obs_config) new_coords = config["output"]["coords"] sim_da, obs_da = find_valid_subset(sim_da, obs_da, sim_config["coords"], obs_config["coords"], new_coords) + time_dim = new_coords.get("t", "time") stat_dict = {} for stat in config["stats"]: - func = getattr(stats, stat) - stat_dict[stat] = func(sim_da, obs_da, new_coords.get("t", "time")) + parts = stat.split(".") + func = scores + for part in parts: + func = getattr(func, part) + stat_dict[stat] = func(sim_da, obs_da, reduce_dims=time_dim) ds = xr.Dataset(stat_dict) if config["output"].get("file", None) is not None: ds.to_netcdf(config["output"]["file"]) diff --git a/hat/compute_hydrostats/stats.py b/hat/compute_hydrostats/stats.py deleted file mode 100644 index 8c8aff9..0000000 --- a/hat/compute_hydrostats/stats.py +++ /dev/null @@ -1,83 +0,0 @@ -import numpy as np -import xarray as xr - - -def bias(sim_da, obs_da, time_name): - return (sim_da - obs_da).mean(dim=time_name, skipna=True) - - -def mae(sim_da, obs_da, time_name): - return np.abs(sim_da - obs_da).mean(dim=time_name, skipna=True) - - -def mape(sim_da, obs_da, time_name): - return mae(sim_da, obs_da, time_name) / np.abs(obs_da).sum(dim=time_name, skipna=True) - - -def mse(sim_da, obs_da, time_name): - return ((sim_da - obs_da) ** 2).mean(dim=time_name, skipna=True) - - -def rmse(sim_da, obs_da, time_name): - return np.sqrt(mse(sim_da, obs_da, time_name)) - - -def br(sim_da, obs_da, time_name): - # as defined in: - # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277. - return sim_da.mean(dim=time_name, skipna=True) / obs_da.mean(dim=time_name, skipna=True) - - -def vr(sim_da, obs_da, time_name): - # as defined in: - # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277. - return (sim_da.std(dim=time_name, skipna=True) * obs_da.mean(dim=time_name, skipna=True)) / ( - obs_da.std(dim=time_name, skipna=True) * sim_da.mean(dim=time_name, skipna=True) - ) - - -def pc_bias(sim_da, obs_da, time_name): - return (sim_da - obs_da).mean(dim=time_name, skipna=True) / obs_da.mean(dim=time_name, skipna=True) - - -def correlation(sim_da, obs_da, time_name): - def _corr(a, b): - mask = ~np.isnan(a) & ~np.isnan(b) - if np.sum(mask) < 2: - return np.nan - return np.corrcoef(a[mask], b[mask])[0, 1] - - return xr.apply_ufunc( - _corr, - sim_da, - obs_da, - input_core_dims=[[time_name], [time_name]], - vectorize=True, - dask="parallelized", - output_dtypes=[float], - ) - - -def kge(sim_da, obs_da, time_name): - # as defined in: - # Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios. Journal of hydrology, 424, 264-277. - B = br(sim_da, obs_da, time_name) - y = vr(sim_da, obs_da, time_name) - r = correlation(sim_da, obs_da, time_name) - - return 1 - np.sqrt((r - 1) ** 2 + (B - 1) ** 2 + (y - 1) ** 2) - - -def index_agreement(sim_da, obs_da, time_name): - numerator = ((obs_da - sim_da) ** 2).sum(dim=time_name, skipna=True) - mean_obs = obs_da.mean(dim=time_name, skipna=True) - denominator = ((np.abs(sim_da - mean_obs) + np.abs(obs_da - mean_obs)) ** 2).sum(dim=time_name, skipna=True) - - return 1 - (numerator / denominator) - - -def nse(sim_da, obs_da, time_name): - numerator = ((sim_da - obs_da) ** 2).sum(dim=time_name, skipna=True) - denominator = ((obs_da - obs_da.mean(dim=time_name, skipna=True)) ** 2).sum(dim=time_name, skipna=True) - - return 1 - (numerator / denominator) From bb831b71573ff9098d4ff45ab6a21aea1a97237b Mon Sep 17 00:00:00 2001 From: oisin-m Date: Mon, 3 Nov 2025 21:00:10 +0100 Subject: [PATCH 2/4] feat: update hydrostats notebook with scores api --- notebooks/workflow/hydrostats_computation.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/notebooks/workflow/hydrostats_computation.ipynb b/notebooks/workflow/hydrostats_computation.ipynb index 2292595..d2e6baf 100644 --- a/notebooks/workflow/hydrostats_computation.ipynb +++ b/notebooks/workflow/hydrostats_computation.ipynb @@ -19,20 +19,20 @@ "source": [ "config = {\n", " \"sim\": {\n", - " \"source\": [\"file\", \"extracted_timeseries.nc\"],\n", + " \"source\": [\"file\", \"simulated_dataset.nc\"],\n", " \"coords\": {\n", " \"s\": \"station\",\n", " \"t\": \"time\"\n", " }\n", " },\n", " \"obs\": {\n", - " \"source\": [\"file\", \"observations.nc\"],\n", + " \"source\": [\"file\", \"observed_dataset.nc\"],\n", " \"coords\": {\n", " \"s\": \"station\",\n", " \"t\": \"time\"\n", " }\n", " },\n", - " \"stats\" : [\"bias\", \"apb\", \"apb2\"],\n", + " \"stats\" : [\"continuous.kge\", \"continuous.mse\"],\n", " \"output\": {\n", " \"file\": \"statistics.nc\",\n", " \"coords\": {\n", @@ -49,7 +49,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "hat", "language": "python", "name": "python3" }, From 974124a7f69fc70629d9434e96b66ee4f6300afc Mon Sep 17 00:00:00 2001 From: oisin-m Date: Mon, 3 Nov 2025 21:00:58 +0100 Subject: [PATCH 3/4] chore: add scores as dep --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 490e424..7df1775 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,8 @@ dependencies = [ "floods-html", "geopy", "setuptools", - "anywidget" + "anywidget", + "scores" ] [project.urls] From cc151ff9c3401df7550c6e8955691f2f1fd65def Mon Sep 17 00:00:00 2001 From: oisin-m Date: Mon, 3 Nov 2025 21:05:16 +0100 Subject: [PATCH 4/4] chore: add attribution for scores --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 7585587..cce6a27 100644 --- a/README.md +++ b/README.md @@ -71,3 +71,9 @@ In applying this licence, ECMWF does not waive the privileges and immunities granted to it by virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction. ``` + +## Attributions + +The hydrostats part of this library uses the [`scores`](https://scores.readthedocs.io) library [1], which is available under [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.txt). + +[1] Leeuwenburg, T., Loveday, N., Ebert, E. E., Cook, H., Khanarmuei, M., Taggart, R. J., Ramanathan, N., Carroll, M., Chong, S., Griffiths, A., & Sharples, J. (2024). scores: A Python package for verifying and evaluating models and predictions with xarray. Journal of Open Source Software, 9(99), 6889. https://doi.org/10.21105/joss.06889