From 8e4decdbb30dd57bae75cea4b2448b6728fdae6e Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 9 Apr 2025 10:09:20 -0400 Subject: [PATCH 1/4] another suite of fucntions (and their tests) to do it all --- enacts/tests/test_calc.py | 360 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) diff --git a/enacts/tests/test_calc.py b/enacts/tests/test_calc.py index 60547d47..06d1c51e 100644 --- a/enacts/tests/test_calc.py +++ b/enacts/tests/test_calc.py @@ -101,6 +101,196 @@ def test_replace_intervals_with_points(): ) +def test_regroup_daily_to_7D(): + t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_int = calc.regroup(precip, group="7D", method="sum") + + assert (precip_int.data == ( + precip + .resample(T="7D") + .sum(skipna=True, min_count=7) + .dropna("T") + .data + )).all() + + +def test_regroup_daily_to_pentad(): + t = pd.date_range(start="2001-01-01T120000", end="2001-04-01T120000", freq="1D") + t_leap = pd.date_range( + start="2000-01-01T120000", end="2000-04-01T120000", freq="1D" + ) + values = 1 + np.arange(t.size) + values_leap = 1 + np.arange(t_leap.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_leap = xr.DataArray(values_leap, coords={"T": t_leap}) + precip_pentad = calc.regroup(precip, group="pentad", method="sum") + precip_pentad_leap = calc.regroup(precip_leap, group="pentad", method="sum") + + assert (precip_pentad.data == [ + 15., 40., 65., 90., 115., 140., 165., 190., 215., 240., 265., + 230., 310., 335., 360., 385., 410., 435. + ]).all() + assert (precip_pentad_leap.data == [ + 15., 40., 65., 90., 115., 140., 165., 190., 215., 240., 265., + 290., 315., 340., 365., 390., 415., 440. + ]).all() + assert ((precip_pentad_leap.data - precip_pentad.data) == [ + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 5., 5., 5., 5., 5., 5., + ]).all() + + +def test_regroup_daily_to_8day(): + t = pd.date_range(start="2020-11-01T120000", end="2021-02-01T120000", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_8day = calc.regroup(precip, group="8day", method="sum") + + assert (precip_8day.data == [ + 92., 156., 220., 284., 348., 412., 351., 524., 588., 652., 716. + ]).all() + + +def test_regroup_daily_to_dekad(): + t = pd.date_range(start="2020-01-01T120000", end="2020-03-09T120000", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_dekad = calc.regroup(precip, group="dekad", method="sum") + + assert (precip_dekad.data == [55., 155., 286., 365., 465., 504.]).all() + + +def test_regroup_daily_to_16day(): + t = pd.date_range(start="2000-11-01T120000", end="2001-02-01T120000", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_16day = calc.regroup(precip, group="16day", method="sum") + + assert (precip_16day.data == [376., 632., 763., 1112., 1368.]).all() + + +def test_regroup_daily_to_1M(): + t = pd.date_range(start="2000-11-01T120000", end="2001-02-01T120000", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_month = calc.regroup(precip, group="1M", method="sum") + + assert (precip_month.data == ( + precip + .resample(T="1M") + .sum(skipna=True, min_count=30) + .dropna("T") + .data + )).all() + + +def test_regroup_daily_to_5M(): + t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_month = calc.regroup(precip, group="5M", method="sum") + + assert (precip_month.data[0] == precip.sel( + T=slice("2000-01-01", "2000-05-31") + ).sum().data).all() + assert (precip_month.data[-1] == precip.sel( + T=slice("2001-04-01", "2001-08-31") + ).sum().data).all() + + +def test_regroup_daily_to_season1(): + t = pd.date_range(start="2000-01-01", end="2002-12-31", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_seas = calc.regroup(precip, group="14 Dec - 29 Mar", method="sum") + + assert (precip_seas.data[0] == precip.sel( + T=slice("2000-03-30", "2000-12-13") + ).sum().data).all() + assert (precip_seas.data[1] == precip.sel( + T=slice("2000-12-14", "2001-03-29") + ).sum().data).all() + assert (precip_seas.data[2] == precip.sel( + T=slice("2001-03-30", "2001-12-13") + ).sum().data).all() + assert (precip_seas.data[3] == precip.sel( + T=slice("2001-12-14", "2002-03-29") + ).sum().data).all() + assert (precip_seas.data[4] == precip.sel( + T=slice("2002-03-30", "2002-12-13") + ).sum().data).all() + + +def test_regroup_daily_to_season2(): + t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_seas = calc.regroup(precip, group="19-29 Feb", method="sum") + + assert (precip_seas.data[0] == precip.sel( + T=slice("2000-02-19", "2000-02-29") + ).sum().data).all() + assert (precip_seas.data[2] == precip.sel( + T=slice("2001-02-19", "2001-02-28") + ).sum().data).all() + + +def test_regroup_daily_to_season3(): + t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_seas = calc.regroup(precip, group="29 Feb - 29 Mar", method="sum") + + assert (precip_seas.data[0] == precip.sel( + T=slice("2000-03-01", "2000-03-29") + ).sum().data).all() + assert (precip_seas.data[2] == precip.sel( + T=slice("2001-03-01", "2001-03-29") + ).sum().data).all() + + +def test_regroup_daily_to_int(): + t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_int = calc.regroup(precip, group=4, method="sum") + + assert (precip_int.data == ( + precip + .resample(T="7D") + .sum(skipna=True, min_count=7) + .dropna("T") + .data + )).all() + + +def test_resample_interval_to_daily(): + t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + precip_pentad = calc.regroup(precip, group="pentad", method="sum") + precip_daily = calc.resample_interval_to_daily(precip_pentad) + + assert (precip_daily == [ + 3., 3., 3., 3., 3., 8., 8., 8., 8., 8., 13., 13., 13., + 13., 13., 18., 18., 18., 18., 18., 23., 23., 23., 23., 23., + ]).all() + + +def test_resample_interval_to_daily_intensive(): + t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") + values = 1 + np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}, attrs={"units": "mm/day"}) + precip_pentad = calc.regroup(precip, group="pentad", method="mean") + precip_daily = calc.resample_interval_to_daily(precip_pentad) + + assert (precip_daily == [ + 3., 3., 3., 3., 3., 8., 8., 8., 8., 8., 13., 13., 13., + 13., 13., 18., 18., 18., 18., 18., 23., 23., 23., 23., 23., + ]).all() + + def test_longest_run_length(): precip = precip_sample() @@ -542,6 +732,64 @@ def test_seasonal_onset_date(): ) +def test_seasonal_onset_date_with_regroup(): + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + # this is rr_mrg.sel(T=slice("2000", "2005-02-28")).isel(X=150, Y=150).precip + synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.where( + (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-31")) + | (synthetic_precip["T"] == pd.to_datetime("2001-04-30")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-01")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-01")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-03")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-16")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-17")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-18")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-01")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-02")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-03")), + 7, + synthetic_precip, + ).rename("synthetic_precip") + + onsetsds = calc.regroup( + time_series=synthetic_precip, + group="1 Mar - 20 Jun", + method="map", + method_kwargs={ + "func": calc.onset_date, + "wet_thresh": 1, + "wet_spell_length": 3, + "wet_spell_thresh": 20, + "min_wet_days": 1, + "dry_spell_length": 7, + "dry_spell_search": 21, + }, + ) + onsetsds = onsetsds.isel(T_bins=np.arange(0, onsetsds.size, 2), drop=True) + onsets = onsetsds + onsetsds["T"] + + assert ( + onsets + == pd.to_datetime( + xr.DataArray( + [ + "2000-03-29T00:00:00.000000000", + "2001-04-30T00:00:00.000000000", + "2002-04-01T00:00:00.000000000", + "2003-05-16T00:00:00.000000000", + "2004-03-01T00:00:00.000000000", + ], + coords={"T_bins": onsets["T_bins"]}, + ) + ) + ).all() + + def test_seasonal_cess_date(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") synthetic_precip = xr.DataArray( @@ -599,6 +847,65 @@ def test_seasonal_cess_date(): ) +def test_seasonal_cess_date_with_regroup(): + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.where( + (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-31")) + | (synthetic_precip["T"] == pd.to_datetime("2001-04-30")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-01")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-01")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-03")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-16")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-17")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-18")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-01")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-02")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-03")), + 7, + synthetic_precip, + ).rename("synthetic_precip") + + wb = calc.water_balance( + daily_rain=synthetic_precip, + et=5, + taw=60, + sminit=0, + time_dim="T" + ).to_array(name="soil moisture") + cessds = calc.regroup( + time_series=wb, + group="1 Sep - 30 Nov", + method="map", + method_kwargs={ + "func": calc.cess_date_from_sm, + "dry_thresh": 5, + "dry_spell_length_thresh": 3, + }, + ) + cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) + cess = (cessds + cessds["T"]).squeeze() + + assert ( + cess + == pd.to_datetime( + xr.DataArray( + [ + "2000-09-01T00:00:00.000000000", + "2001-09-01T00:00:00.000000000", + "2002-09-01T00:00:00.000000000", + "2003-09-01T00:00:00.000000000", + "2004-09-01T00:00:00.000000000", + ],dims=["T"],coords={"T": cess["T"]}, + ) + ) + ).all() + + def test_seasonal_cess_date_from_rain(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") synthetic_precip = xr.DataArray( @@ -649,6 +956,59 @@ def test_seasonal_cess_date_from_rain(): ), ) +def test_seasonal_cess_date_from_rain_with_regroup(): + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.where( + (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) + | (synthetic_precip["T"] == pd.to_datetime("2000-03-31")) + | (synthetic_precip["T"] == pd.to_datetime("2001-04-30")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-01")) + | (synthetic_precip["T"] == pd.to_datetime("2001-05-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-01")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-02")) + | (synthetic_precip["T"] == pd.to_datetime("2002-04-03")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-16")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-17")) + | (synthetic_precip["T"] == pd.to_datetime("2003-05-18")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-01")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-02")) + | (synthetic_precip["T"] == pd.to_datetime("2004-03-03")), + 7, + synthetic_precip, + ).rename("synthetic_precip") + cessds = calc.regroup( + time_series=synthetic_precip, + group="1 Sep - 30 Nov", + method="map", + method_kwargs={ + "func": calc.cess_date_from_rain, + "dry_thresh": 5, + "dry_spell_length_thresh": 3, + "et": 5, + "taw": 60, + "sminit": 0, + }, + ) + cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) + cess = (cessds + cessds["T"]).squeeze() + + assert ( + cess + == pd.to_datetime( + xr.DataArray( + [ + "2000-09-01T00:00:00.000000000", + "2001-09-01T00:00:00.000000000", + "2002-09-01T00:00:00.000000000", + "2003-09-01T00:00:00.000000000", + "2004-09-01T00:00:00.000000000", + ],dims=["T"],coords={"T": cess["T"]}, + ) + ) + ).all() + def precip_sample(): From 6e40ce94abb7e869c3e3e2d657ddeeda684b4a56 Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 23 Apr 2025 11:12:07 -0400 Subject: [PATCH 2/4] from assert == to np/xr assertions functions --- enacts/tests/test_calc.py | 237 ++++++++++++++++++++------------------ 1 file changed, 122 insertions(+), 115 deletions(-) diff --git a/enacts/tests/test_calc.py b/enacts/tests/test_calc.py index 06d1c51e..5fee472e 100644 --- a/enacts/tests/test_calc.py +++ b/enacts/tests/test_calc.py @@ -107,13 +107,9 @@ def test_regroup_daily_to_7D(): precip = xr.DataArray(values, coords={"T": t}) precip_int = calc.regroup(precip, group="7D", method="sum") - assert (precip_int.data == ( - precip - .resample(T="7D") - .sum(skipna=True, min_count=7) - .dropna("T") - .data - )).all() + np.testing.assert_array_equal( + precip_int, precip.resample(T="7D").sum(skipna=True, min_count=7).dropna("T") + ) def test_regroup_daily_to_pentad(): @@ -128,17 +124,17 @@ def test_regroup_daily_to_pentad(): precip_pentad = calc.regroup(precip, group="pentad", method="sum") precip_pentad_leap = calc.regroup(precip_leap, group="pentad", method="sum") - assert (precip_pentad.data == [ + np.testing.assert_array_equal(precip_pentad.data, [ 15., 40., 65., 90., 115., 140., 165., 190., 215., 240., 265., 230., 310., 335., 360., 385., 410., 435. - ]).all() - assert (precip_pentad_leap.data == [ + ]) + np.testing.assert_array_equal(precip_pentad_leap.data, [ 15., 40., 65., 90., 115., 140., 165., 190., 215., 240., 265., 290., 315., 340., 365., 390., 415., 440. - ]).all() - assert ((precip_pentad_leap.data - precip_pentad.data) == [ + ]) + np.testing.assert_array_equal((precip_pentad_leap.data - precip_pentad.data), [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 5., 5., 5., 5., 5., 5., - ]).all() + ]) def test_regroup_daily_to_8day(): @@ -147,9 +143,9 @@ def test_regroup_daily_to_8day(): precip = xr.DataArray(values, coords={"T": t}) precip_8day = calc.regroup(precip, group="8day", method="sum") - assert (precip_8day.data == [ + np.testing.assert_array_equal(precip_8day.data, [ 92., 156., 220., 284., 348., 412., 351., 524., 588., 652., 716. - ]).all() + ]) def test_regroup_daily_to_dekad(): @@ -158,7 +154,9 @@ def test_regroup_daily_to_dekad(): precip = xr.DataArray(values, coords={"T": t}) precip_dekad = calc.regroup(precip, group="dekad", method="sum") - assert (precip_dekad.data == [55., 155., 286., 365., 465., 504.]).all() + np.testing.assert_array_equal( + precip_dekad.data, [55., 155., 286., 365., 465., 504.] + ) def test_regroup_daily_to_16day(): @@ -167,7 +165,9 @@ def test_regroup_daily_to_16day(): precip = xr.DataArray(values, coords={"T": t}) precip_16day = calc.regroup(precip, group="16day", method="sum") - assert (precip_16day.data == [376., 632., 763., 1112., 1368.]).all() + np.testing.assert_array_equal( + precip_16day.data, [376., 632., 763., 1112., 1368.] + ) def test_regroup_daily_to_1M(): @@ -176,13 +176,10 @@ def test_regroup_daily_to_1M(): precip = xr.DataArray(values, coords={"T": t}) precip_month = calc.regroup(precip, group="1M", method="sum") - assert (precip_month.data == ( - precip - .resample(T="1M") - .sum(skipna=True, min_count=30) - .dropna("T") - .data - )).all() + np.testing.assert_array_equal( + precip_month.data, + precip.resample(T="1M").sum(skipna=True, min_count=30).dropna("T"), + ) def test_regroup_daily_to_5M(): @@ -191,12 +188,14 @@ def test_regroup_daily_to_5M(): precip = xr.DataArray(values, coords={"T": t}) precip_month = calc.regroup(precip, group="5M", method="sum") - assert (precip_month.data[0] == precip.sel( - T=slice("2000-01-01", "2000-05-31") - ).sum().data).all() - assert (precip_month.data[-1] == precip.sel( - T=slice("2001-04-01", "2001-08-31") - ).sum().data).all() + xr.testing.assert_equal( + precip_month.isel(T_bins=0, drop=True), + precip.sel(T=slice("2000-01-01", "2000-05-31")).sum() + ) + xr.testing.assert_equal( + precip_month.isel(T_bins=-1, drop=True), + precip.sel(T=slice("2001-04-01", "2001-08-31")).sum() + ) def test_regroup_daily_to_season1(): @@ -205,21 +204,26 @@ def test_regroup_daily_to_season1(): precip = xr.DataArray(values, coords={"T": t}) precip_seas = calc.regroup(precip, group="14 Dec - 29 Mar", method="sum") - assert (precip_seas.data[0] == precip.sel( - T=slice("2000-03-30", "2000-12-13") - ).sum().data).all() - assert (precip_seas.data[1] == precip.sel( - T=slice("2000-12-14", "2001-03-29") - ).sum().data).all() - assert (precip_seas.data[2] == precip.sel( - T=slice("2001-03-30", "2001-12-13") - ).sum().data).all() - assert (precip_seas.data[3] == precip.sel( - T=slice("2001-12-14", "2002-03-29") - ).sum().data).all() - assert (precip_seas.data[4] == precip.sel( - T=slice("2002-03-30", "2002-12-13") - ).sum().data).all() + xr.testing.assert_equal( + precip_seas.isel(T_bins=0, drop=True), + precip.sel(T=slice("2000-03-30", "2000-12-13")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=1, drop=True), + precip.sel(T=slice("2000-12-14", "2001-03-29")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=2, drop=True), + precip.sel(T=slice("2001-03-30", "2001-12-13")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=3, drop=True), + precip.sel(T=slice("2001-12-14", "2002-03-29")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=4, drop=True), + precip.sel(T=slice("2002-03-30", "2002-12-13")).sum() + ) def test_regroup_daily_to_season2(): @@ -228,12 +232,14 @@ def test_regroup_daily_to_season2(): precip = xr.DataArray(values, coords={"T": t}) precip_seas = calc.regroup(precip, group="19-29 Feb", method="sum") - assert (precip_seas.data[0] == precip.sel( - T=slice("2000-02-19", "2000-02-29") - ).sum().data).all() - assert (precip_seas.data[2] == precip.sel( - T=slice("2001-02-19", "2001-02-28") - ).sum().data).all() + xr.testing.assert_equal( + precip_seas.isel(T_bins=0, drop=True), + precip.sel(T=slice("2000-02-19", "2000-02-29")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=2, drop=True), + precip.sel(T=slice("2001-02-19", "2001-02-28")).sum() + ) def test_regroup_daily_to_season3(): @@ -242,12 +248,14 @@ def test_regroup_daily_to_season3(): precip = xr.DataArray(values, coords={"T": t}) precip_seas = calc.regroup(precip, group="29 Feb - 29 Mar", method="sum") - assert (precip_seas.data[0] == precip.sel( - T=slice("2000-03-01", "2000-03-29") - ).sum().data).all() - assert (precip_seas.data[2] == precip.sel( - T=slice("2001-03-01", "2001-03-29") - ).sum().data).all() + xr.testing.assert_equal( + precip_seas.isel(T_bins=0, drop=True), + precip.sel(T=slice("2000-03-01", "2000-03-29")).sum() + ) + xr.testing.assert_equal( + precip_seas.isel(T_bins=2, drop=True), + precip.sel(T=slice("2001-03-01", "2001-03-29")).sum() + ) def test_regroup_daily_to_int(): @@ -256,13 +264,10 @@ def test_regroup_daily_to_int(): precip = xr.DataArray(values, coords={"T": t}) precip_int = calc.regroup(precip, group=4, method="sum") - assert (precip_int.data == ( - precip - .resample(T="7D") - .sum(skipna=True, min_count=7) - .dropna("T") - .data - )).all() + np.testing.assert_array_equal( + precip_int, + precip.resample(T="7D").sum(skipna=True, min_count=7).dropna("T"), + ) def test_resample_interval_to_daily(): @@ -272,10 +277,10 @@ def test_resample_interval_to_daily(): precip_pentad = calc.regroup(precip, group="pentad", method="sum") precip_daily = calc.resample_interval_to_daily(precip_pentad) - assert (precip_daily == [ + np.testing.assert_array_equal(precip_daily, [ 3., 3., 3., 3., 3., 8., 8., 8., 8., 8., 13., 13., 13., 13., 13., 18., 18., 18., 18., 18., 23., 23., 23., 23., 23., - ]).all() + ]) def test_resample_interval_to_daily_intensive(): @@ -285,10 +290,10 @@ def test_resample_interval_to_daily_intensive(): precip_pentad = calc.regroup(precip, group="pentad", method="mean") precip_daily = calc.resample_interval_to_daily(precip_pentad) - assert (precip_daily == [ + np.testing.assert_array_equal(precip_daily, [ 3., 3., 3., 3., 3., 8., 8., 8., 8., 8., 13., 13., 13., 13., 13., 18., 18., 18., 18., 18., 23., 23., 23., 23., 23., - ]).all() + ]) def test_longest_run_length(): @@ -735,7 +740,9 @@ def test_seasonal_onset_date(): def test_seasonal_onset_date_with_regroup(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") # this is rr_mrg.sel(T=slice("2000", "2005-02-28")).isel(X=150, Y=150).precip - synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.DataArray( + np.zeros(t.size), dims=["T"], coords={"T": t} + ) + 1.1 synthetic_precip = xr.where( (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) @@ -771,23 +778,21 @@ def test_seasonal_onset_date_with_regroup(): }, ) onsetsds = onsetsds.isel(T_bins=np.arange(0, onsetsds.size, 2), drop=True) - onsets = onsetsds + onsetsds["T"] + onsets = (onsetsds + onsetsds["T"]).drop_vars("T") - assert ( - onsets - == pd.to_datetime( - xr.DataArray( - [ - "2000-03-29T00:00:00.000000000", - "2001-04-30T00:00:00.000000000", - "2002-04-01T00:00:00.000000000", - "2003-05-16T00:00:00.000000000", - "2004-03-01T00:00:00.000000000", - ], - coords={"T_bins": onsets["T_bins"]}, - ) - ) - ).all() + xr.testing.assert_equal( + onsets, + xr.DataArray( + pd.to_datetime([ + "2000-03-29T00:00:00.000000000", + "2001-04-30T00:00:00.000000000", + "2002-04-01T00:00:00.000000000", + "2003-05-16T00:00:00.000000000", + "2004-03-01T00:00:00.000000000", + ]), + coords={"T_bins": onsets["T_bins"]}, + ), + ) def test_seasonal_cess_date(): @@ -849,7 +854,9 @@ def test_seasonal_cess_date(): def test_seasonal_cess_date_with_regroup(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") - synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.DataArray( + np.zeros(t.size), dims=["T"], coords={"T": t} + ) + 1.1 synthetic_precip = xr.where( (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) @@ -888,22 +895,21 @@ def test_seasonal_cess_date_with_regroup(): }, ) cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) - cess = (cessds + cessds["T"]).squeeze() + cess = (cessds + cessds["T"]).squeeze(drop=True) - assert ( - cess - == pd.to_datetime( - xr.DataArray( - [ - "2000-09-01T00:00:00.000000000", - "2001-09-01T00:00:00.000000000", - "2002-09-01T00:00:00.000000000", - "2003-09-01T00:00:00.000000000", - "2004-09-01T00:00:00.000000000", - ],dims=["T"],coords={"T": cess["T"]}, - ) + xr.testing.assert_equal( + cess, + xr.DataArray( + pd.to_datetime([ + "2000-09-01T00:00:00.000000000", + "2001-09-01T00:00:00.000000000", + "2002-09-01T00:00:00.000000000", + "2003-09-01T00:00:00.000000000", + "2004-09-01T00:00:00.000000000", + ]), + dims=["T"], coords={"T": cess["T"]}, ) - ).all() + ) def test_seasonal_cess_date_from_rain(): @@ -958,7 +964,9 @@ def test_seasonal_cess_date_from_rain(): def test_seasonal_cess_date_from_rain_with_regroup(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") - synthetic_precip = xr.DataArray(np.zeros(t.size), dims=["T"], coords={"T": t}) + 1.1 + synthetic_precip = xr.DataArray( + np.zeros(t.size), dims=["T"], coords={"T": t} + ) + 1.1 synthetic_precip = xr.where( (synthetic_precip["T"] == pd.to_datetime("2000-03-29")) | (synthetic_precip["T"] == pd.to_datetime("2000-03-30")) @@ -994,20 +1002,19 @@ def test_seasonal_cess_date_from_rain_with_regroup(): cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) cess = (cessds + cessds["T"]).squeeze() - assert ( - cess - == pd.to_datetime( - xr.DataArray( - [ - "2000-09-01T00:00:00.000000000", - "2001-09-01T00:00:00.000000000", - "2002-09-01T00:00:00.000000000", - "2003-09-01T00:00:00.000000000", - "2004-09-01T00:00:00.000000000", - ],dims=["T"],coords={"T": cess["T"]}, - ) - ) - ).all() + xr.testing.assert_equal( + cess, + xr.DataArray( + pd.to_datetime([ + "2000-09-01T00:00:00.000000000", + "2001-09-01T00:00:00.000000000", + "2002-09-01T00:00:00.000000000", + "2003-09-01T00:00:00.000000000", + "2004-09-01T00:00:00.000000000", + ]), + dims=["T"], coords={"T": cess["T"]}, + ), + ) def precip_sample(): From 842bbb38faeeafcfe3df11c735d86eff0372c505 Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 23 Apr 2025 12:20:32 -0400 Subject: [PATCH 3/4] various improvements on names, types, practice, syntax, doc... --- enacts/calc.py | 278 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/enacts/calc.py b/enacts/calc.py index 13a1039e..c01040ef 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -961,6 +961,284 @@ def groupby_dekads(daily_data, time_dim="T"): return daily_data.groupby_bins(daily_data[time_dim], dekad_edges, right=False) +def resample_interval_to_daily(time_series, is_intensive=None, time_dim="T_bins"): + """ Resample any (interval-based) time series to daily + + Parameters + ---------- + time_series : xr.DataArray or xr.Dataset + data depending on time intervals greater or equal then a day + is_intensive : boolean, optional + indicate the "extensive" or "intensive" property of `time_series` . + Upsampling to daily requires intensive data. + If False, make intensive by dividing by length of intervals in days + Default is None in which case: if units end with "/day", considers intensive, + else, considers extensive + time_dim : str, optional + name of interval time dimenstion, default is "T_bins" + + Returns + ------- + time_series : xr.DataArray or xr.Dataset + `time_series` resampled to daily + + See Also + -------- + pandas.Interval, intervals_to_points, replace_intervals_with_points, + xr.DataArray.resample, xr.Dataset.resample + + Notes + ----- + The day is considered the smallest unit or interval of time in the sense that a + time dimensions expressed as time points is considered equivalent to intervals of + length 1 day. There may be generalization to make to adapt to the actual smallest + time unit of this ecosystem which is the ns. + In thermodynamics (at the core of climate science), quantities can be categorized + as being intensive or extensive to identify how they change when a system changes + in size: 2 systems merging add up (extensive) their mass and volume, but they + don't (intensive) their density (more at + https://en.wikipedia.org/wiki/Intensive_and_extensive_properties). + Closer to what we care about here, temperature is intensive so a monthly value + can be upsampled to daily by assigning same value to all day (implicitely + admitting that the monthly value is a daily average); but precipitation is + extensive so that so a monthly value can not be upsampled to daily by simply + reassigning it (if it rains 300mm in a month, it can rain 300m as well in only + one day of the month -- and 0 all the other days). However, monthly precipitation + expressed in mm/day is intensive. + Extent property could be inferred in more cases, e.g. Kelvin is intensive. + Waiting for pint to figure that all out. + This function differ from xr.DataArray/Dataset.resample in that resample expects + `time_dim` to be datetime-like but it doesn't consider that pd.Interval of + datetime-like is (probably because it reasons in terms of frequency, ignoring + width (intervals)). + """ + if isinstance(time_series[time_dim].values[0], pd._libs.interval.Interval): + # else time_dim is not intervals thus points thus considered daily already + # make daily for computations + if is_intensive is None : + # There are a lot more cases to cover + if "units" in time_series.attrs : + is_intensive = "/day" in time_series.attrs["units"] + else : + is_intensive = False + if not is_intensive : # Can only ffill intensive data + time_series = time_series / [ + time_series[time_dim].values[t].length.days + for t in range(time_series[time_dim].size) + ] + if "units" in time_series.attrs : + time_series.attrs["units"] = f'{time_series.attrs["units"]}/day' + time_dim_left = ( # There might be other automatic cases to cover + # Same logic as in intervals_to_points + time_dim.replace("_bins", "_left") if time_dim.endswith("_bins") + else "_".join(time_dim, "_left") + ) + time_dim_right = ( + time_dim.replace("_bins", "_right") if time_dim.endswith("_bins") + else "_".join(time_dim, "right") + ) + time_series = xr.concat([ + replace_intervals_with_points(time_series, time_dim, to_point="left"), + # Need to cover entirely the last interval + replace_intervals_with_points( + time_series.isel({time_dim : [-1]}), time_dim, to_point="right" + ).rename({time_dim_right : time_dim_left}), + ], dim=time_dim_left) + time_series = ( + time_series.resample({time_dim_left: "1D"}).ffill() + # once filled, can drop the open right point of last interval + .isel({time_dim_left : slice(0, -1)}) + ) + return time_series + + +def regroup(time_series, group="1D", method=None, method_kwargs={}, time_dim="T"): + """ Regroup any type of interval-based time series to another, + according to `method` + + Parameters + ---------- + time_series : xr.DataArray or xr.Dataset + data depending on time intervals greater or equal then a day + group: str, int, or array-like[pandas.DatetimeIndex] + indicates the new type of intervals to regroup to. + As string, must be: nD, pentad, 8day, dekad, 16day, nM, d-d Mmm + or d Mmm - d Mmm. See Notes for details. + As integer or array-like[pandas.DatetimeIndex], + see xr.DataArray.groupby_bins' `bins` Parameter + method: str, optional + name of xr.core.groupby.DataArrayGroupBy's Method to apply to form the new + intervals. Default is None in which case no final reduction is applied + and returned `time_series` is a xr.core.groupby.DataArrayGroupBy object of + days grouped according to `group` . + method_kwargs: dict, optional + keyword arguments of `method` . Default is an empty dict in which case + default keywords of `method` will be applied. + time_dim : str, optional + name of interval time dimenstion, default is "T" + + Returns + ------- + regrouped : xr.DataArray or xr.Dataset + `time_series` regrouped to specified time intervals according to `method` + + See Also + -------- + resample_interval_to_daily, pandas.DatetimeIndex, xarray.DataArray.groupby_bins + + Notes + ----- + The day is considered the smallest unit or interval of time (see Note of + resample_interval_to_daily). In this implementation, all `time_series` inputs are + resampled to daily no matter the `group` of interest. It may not be necessary + and more efficient if `group` is coarser than a day (so nearly all cases). The + intersection of `time_series` intervals and `group` intervals would form the + coarsest partition of `time_dim` and weights could be applied depending on + `method` . + The seasonal grouping is of different nature than all other groupings. While all + others make a partition of time, the purpose of the seasonal grouping is to make + a yearly time series of a tailored sesason (e.g. 19 Jan - 29 Mar). However, the + case fits. As of now, the selection of the season of interest is to be done after + applyting `regroup` . It could be incorporated into `regroup` and constitute the + specificity of this case. Or the case could be removed altogether and put in + another function. + Outputs of `regroup` that have a yearly periodicity (ie all except nD, some nM, + int and some array-like[pandas.DatetimeIndex]), could be used to split the time + dimension in 2: years and intervals of the year; which in turn could be reduced + to make climatologies or yearly time series of a given interval. + Known groups: + * nD (e.g. 7D): intervals of n (e.g. 7) days from first day of `time_dim` + * pentad: partition of the year in 5-day intervals. In leap years, Feb 29 is + included in the 25 Feb - 1 Mar interval, making it 6-day long. E.g. at + https://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.FEWS/.DAILY/.est_prcp/pentadAverage + * 8day: partition of the year in 8-day intervals. The last interval is used to + adjust the partitioning and is 26/27-31 Dec depending on leap years. E.g. at + https://iridl.ldeo.columbia.edu/SOURCES/.USGS/.LandDAAC/.MODIS/.1km/.8day/.version_006/.Terra/.NY/.Day/.LST + * dekad: partition of the months in 3 10-day intervals, except for the last dekad + of the month that runs until the end of the month (from the 21st -- thus can be + 8, 9, 10 or 11 -day long) + * 16day: partition of the year in 16-day intervals. The last interval is used to + adjust the partitioning and is 18/19-31 Dec depending on leap years. E.g. at + https://iridl.ldeo.columbia.edu/SOURCES/.USGS/.LandDAAC/.MODIS/.version_006/.EAF/.NDVI + * nM (e.g. 5D): intervals of n (e.g. 5) months from first full month of `time_dim` + * d-d Mmm or d Mmm - d Mmm (e.g. 21-29 Mar or 19 Jan - 29 Mar): 2 seasons to + partition time against. The 2nd season is the complentary to the one given to + `group` (e.g. 30 Mar - 18 Jan). + * int (e.g. 7): number of equally sized intervals in `time_dim` (see + xr.DataArray.groupby_bins' `bins` for details). + * array-like[pandas.DatetimeIndex]: edges of time intervals (see + xr.DataArray.groupby_bins' `bins` for details). + """ + time_series = resample_interval_to_daily(time_series, time_dim=time_dim) + edges_base = pd.date_range( + # Flooring needed only if time_series already daily + start=time_series[time_dim][0].dt.floor("D").values, + # need one more day since right is excluded + end=(time_series[time_dim][-1] + np.timedelta64(1, "D")).dt.floor("D").values, + freq="1D", + ) + if isinstance(group, str) : + # Form bins according to group + if group.endswith("D") : + bins = np.array( + [edges_base[t] for t in range(0, edges_base.size, int(group[:-1]))] + ) + elif group == "pentad" : + # 29 Feb always in last pentad of Feb + bins = edges_base.where( + (edges_base.dayofyear % 5) == ( + 1 - np.array([ + pd.Timedelta( + ((eb.dayofyear > 59) * (not eb.is_leap_year)), "D" + ).days for eb in edges_base + ]) + ) + ).dropna() + elif group == "8day" : + # last period of year used to adjusting + bins = edges_base.where((edges_base.dayofyear % 8) == 1).dropna() + elif group == "dekad" : + bins = edges_base.where( + (edges_base.day == 1) + | (edges_base.day == 11) + | (edges_base.day == 21) + ).dropna() + elif group == "16day" : + # last period of year used to adjusting + bins = edges_base.where((edges_base.dayofyear % 16) == 1).dropna() + elif group.endswith("M") : + bins = edges_base.where(edges_base.day == 1).dropna() + bins = np.array([bins[t] for t in range(0, bins.size, int(group[:-1]))]) + elif "-" in group : + # e.g. "29 Feb - 30 Mar" or "2-29 Mar" + # This case usage is to keep only the season of interest given as input. + # Thus not to form a partition of time as in other cases and could be + # moved to another function. Or if kept could include the selection of + # said season of interest. (Or just leave as is). + if " - " in group : + start_day = group.split()[0] + start_month = group.split()[1] + end_day = group.split()[3] + end_month = group.split()[4] + else: + start_day = group.split()[0].split("-")[0] + end_day = group.split()[0].split("-")[1] + start_month = group.split()[1] + end_month = start_month + start_day = int(start_day) + end_day = int(end_day) + if start_day == 29 and start_month == "Feb" : + # don't allow start on 29 Feb: this is pushy + start_day = 1 + start_month = "Mar" + offset = 1 + if end_day == 29 and end_month == "Feb" : + end_day = 1 + end_month = "Mar" + offset = 0 + bins = edges_base.where( + ( + (edges_base.day == start_day) + & [ + (edges_base.month_name()[t][:3] == start_month) + for t in range(edges_base.size) + ] + ) + | ( # group end is inclusive + ((edges_base - pd.Timedelta(offset, "D")).day == end_day) + & [ + (( + edges_base - pd.Timedelta(offset, "D") + ).month_name()[t][:3] == end_month) + for t in range(edges_base.size) + ] + ) + ).dropna() + else: + raise Exception( + f"group as str must be nD, pentad, 8day, dekad, 16day, nM, d-d Mmm" + f" or d Mmm - d Mmm" + ) + elif isinstance(group, int) : + bins = group + elif insintance(group, pandas.core.indexes.datetimes.DatetimeIndex): + # custom bins edges from input + bins = group + else : + raise Exception( + f"group must be int, array, or str of form nD, pentad, 8day, dekad," + f" 16day, nM,d-d Mmm or d Mmm - d Mmm" + ) + if (not isinstance(group, int)): + assert (bins.size > 1), ( + "data must span at least one full group (need 2 edges to form 1 bin)" + ) + regrouped = time_series.groupby_bins(time_series[time_dim], bins, right=False) + if method is not None : + regrouped = getattr(regrouped, method)(**method_kwargs) + return regrouped + + def strftimeb2int(strftimeb): """Convert month values to integers (1-12) from strings. From 5a1c9af49ad6d1a12b4f981f63f51176b7defa85 Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 23 Apr 2025 15:05:39 -0400 Subject: [PATCH 4/4] remove optional reduction application from regroup --- enacts/calc.py | 22 ++---- enacts/tests/test_calc.py | 139 ++++++++++++++++++++++++++++---------- 2 files changed, 109 insertions(+), 52 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index c01040ef..4d0b5d7d 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -1052,9 +1052,8 @@ def resample_interval_to_daily(time_series, is_intensive=None, time_dim="T_bins" return time_series -def regroup(time_series, group="1D", method=None, method_kwargs={}, time_dim="T"): - """ Regroup any type of interval-based time series to another, - according to `method` +def regroup(time_series, group="1D", time_dim="T"): + """ Regroup any type of interval-based time series to another Parameters ---------- @@ -1066,21 +1065,13 @@ def regroup(time_series, group="1D", method=None, method_kwargs={}, time_dim="T" or d Mmm - d Mmm. See Notes for details. As integer or array-like[pandas.DatetimeIndex], see xr.DataArray.groupby_bins' `bins` Parameter - method: str, optional - name of xr.core.groupby.DataArrayGroupBy's Method to apply to form the new - intervals. Default is None in which case no final reduction is applied - and returned `time_series` is a xr.core.groupby.DataArrayGroupBy object of - days grouped according to `group` . - method_kwargs: dict, optional - keyword arguments of `method` . Default is an empty dict in which case - default keywords of `method` will be applied. time_dim : str, optional name of interval time dimenstion, default is "T" Returns ------- - regrouped : xr.DataArray or xr.Dataset - `time_series` regrouped to specified time intervals according to `method` + regrouped : xr.core.groupby.DataArray/DatasetGroupBy + `time_series` grouped to specified time intervals groups See Also -------- @@ -1233,10 +1224,7 @@ def regroup(time_series, group="1D", method=None, method_kwargs={}, time_dim="T" assert (bins.size > 1), ( "data must span at least one full group (need 2 edges to form 1 bin)" ) - regrouped = time_series.groupby_bins(time_series[time_dim], bins, right=False) - if method is not None : - regrouped = getattr(regrouped, method)(**method_kwargs) - return regrouped + return time_series.groupby_bins(time_series[time_dim], bins, right=False) def strftimeb2int(strftimeb): diff --git a/enacts/tests/test_calc.py b/enacts/tests/test_calc.py index 5fee472e..1fa4929d 100644 --- a/enacts/tests/test_calc.py +++ b/enacts/tests/test_calc.py @@ -105,7 +105,7 @@ def test_regroup_daily_to_7D(): t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_int = calc.regroup(precip, group="7D", method="sum") + precip_int = calc.regroup(precip, group="7D").sum() np.testing.assert_array_equal( precip_int, precip.resample(T="7D").sum(skipna=True, min_count=7).dropna("T") @@ -121,8 +121,8 @@ def test_regroup_daily_to_pentad(): values_leap = 1 + np.arange(t_leap.size) precip = xr.DataArray(values, coords={"T": t}) precip_leap = xr.DataArray(values_leap, coords={"T": t_leap}) - precip_pentad = calc.regroup(precip, group="pentad", method="sum") - precip_pentad_leap = calc.regroup(precip_leap, group="pentad", method="sum") + precip_pentad = calc.regroup(precip, group="pentad").sum() + precip_pentad_leap = calc.regroup(precip_leap, group="pentad").sum() np.testing.assert_array_equal(precip_pentad.data, [ 15., 40., 65., 90., 115., 140., 165., 190., 215., 240., 265., @@ -141,7 +141,7 @@ def test_regroup_daily_to_8day(): t = pd.date_range(start="2020-11-01T120000", end="2021-02-01T120000", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_8day = calc.regroup(precip, group="8day", method="sum") + precip_8day = calc.regroup(precip, group="8day").sum() np.testing.assert_array_equal(precip_8day.data, [ 92., 156., 220., 284., 348., 412., 351., 524., 588., 652., 716. @@ -152,7 +152,7 @@ def test_regroup_daily_to_dekad(): t = pd.date_range(start="2020-01-01T120000", end="2020-03-09T120000", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_dekad = calc.regroup(precip, group="dekad", method="sum") + precip_dekad = calc.regroup(precip, group="dekad").sum() np.testing.assert_array_equal( precip_dekad.data, [55., 155., 286., 365., 465., 504.] @@ -163,7 +163,7 @@ def test_regroup_daily_to_16day(): t = pd.date_range(start="2000-11-01T120000", end="2001-02-01T120000", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_16day = calc.regroup(precip, group="16day", method="sum") + precip_16day = calc.regroup(precip, group="16day").sum() np.testing.assert_array_equal( precip_16day.data, [376., 632., 763., 1112., 1368.] @@ -174,7 +174,7 @@ def test_regroup_daily_to_1M(): t = pd.date_range(start="2000-11-01T120000", end="2001-02-01T120000", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_month = calc.regroup(precip, group="1M", method="sum") + precip_month = calc.regroup(precip, group="1M").sum() np.testing.assert_array_equal( precip_month.data, @@ -186,7 +186,7 @@ def test_regroup_daily_to_5M(): t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_month = calc.regroup(precip, group="5M", method="sum") + precip_month = calc.regroup(precip, group="5M").sum() xr.testing.assert_equal( precip_month.isel(T_bins=0, drop=True), @@ -202,7 +202,7 @@ def test_regroup_daily_to_season1(): t = pd.date_range(start="2000-01-01", end="2002-12-31", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_seas = calc.regroup(precip, group="14 Dec - 29 Mar", method="sum") + precip_seas = calc.regroup(precip, group="14 Dec - 29 Mar").sum() xr.testing.assert_equal( precip_seas.isel(T_bins=0, drop=True), @@ -230,7 +230,7 @@ def test_regroup_daily_to_season2(): t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_seas = calc.regroup(precip, group="19-29 Feb", method="sum") + precip_seas = calc.regroup(precip, group="19-29 Feb").sum() xr.testing.assert_equal( precip_seas.isel(T_bins=0, drop=True), @@ -246,7 +246,7 @@ def test_regroup_daily_to_season3(): t = pd.date_range(start="2000-01-01", end="2001-12-31", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_seas = calc.regroup(precip, group="29 Feb - 29 Mar", method="sum") + precip_seas = calc.regroup(precip, group="29 Feb - 29 Mar").sum() xr.testing.assert_equal( precip_seas.isel(T_bins=0, drop=True), @@ -262,7 +262,7 @@ def test_regroup_daily_to_int(): t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_int = calc.regroup(precip, group=4, method="sum") + precip_int = calc.regroup(precip, group=4).sum() np.testing.assert_array_equal( precip_int, @@ -274,7 +274,7 @@ def test_resample_interval_to_daily(): t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - precip_pentad = calc.regroup(precip, group="pentad", method="sum") + precip_pentad = calc.regroup(precip, group="pentad").sum() precip_daily = calc.resample_interval_to_daily(precip_pentad) np.testing.assert_array_equal(precip_daily, [ @@ -287,7 +287,7 @@ def test_resample_interval_to_daily_intensive(): t = pd.date_range(start="2000-01-01", end="2000-01-28", freq="1D") values = 1 + np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}, attrs={"units": "mm/day"}) - precip_pentad = calc.regroup(precip, group="pentad", method="mean") + precip_pentad = calc.regroup(precip, group="pentad").mean() precip_daily = calc.resample_interval_to_daily(precip_pentad) np.testing.assert_array_equal(precip_daily, [ @@ -630,6 +630,37 @@ def test_seasonal_onset_date_keeps_returning_same_outputs(): ), ) +def test_seasonal_onset_date_keeps_returning_same_outputs_with_regroup(): + precip = data_test_calc.multi_year_data_sample() + onsetsds = calc.regroup( + time_series=precip, group="1 Mar - 20 Jun" + ).map(calc.onset_date, **{ + "wet_thresh": 1, + "wet_spell_length": 3, + "wet_spell_thresh": 20, + "min_wet_days": 1, + "dry_spell_length": 7, + "dry_spell_search": 21, + }) + # That part could be included in regroup for this specific group case + onsetsds = onsetsds.isel(T_bins=np.arange(0, onsetsds.size, 2), drop=True) + # Note that onset_date is written and such a manner that is also outputs T, while + # regroup brings intervals T_bins. T_bins is enough so onset_date could possibly + # be rewritten accordingly + onsets = (onsetsds + onsetsds["T"]) + + np.testing.assert_array_equal( + onsets, + pd.to_datetime( + [ + "NaT", + "2001-03-08T00:00:00.000000000", + "NaT", + "2003-04-12T00:00:00.000000000", + "2004-04-04T00:00:00.000000000", + ], + ), + ) def test_seasonal_cess_date_keeps_returning_same_outputs(): @@ -663,6 +694,38 @@ def test_seasonal_cess_date_keeps_returning_same_outputs(): ), ) +def test_seasonal_cess_date_keeps_returning_same_outputs_with_regroup(): + + precip = data_test_calc.multi_year_data_sample() + wb = calc.water_balance( + daily_rain=precip, + et=5, + taw=60, + sminit=0, + time_dim="T" + ).to_array(name="soil moisture").squeeze("variable", drop=True) + cessds = calc.regroup( + time_series=wb, group="1 Sep - 30 Nov" + ).map(calc.cess_date_from_sm, **{ + "dry_thresh": 5, + "dry_spell_length_thresh": 3, + }) + # Not sure what happened to T_bins here + cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) + cess = (cessds + cessds["T"]).squeeze() + np.testing.assert_array_equal( + cess, + pd.to_datetime( + [ + "2000-09-21T00:00:00.000000000", + "2001-09-03T00:00:00.000000000", + "2002-09-03T00:00:00.000000000", + "2003-09-24T00:00:00.000000000", + "2004-09-01T00:00:00.000000000", + ], + ), + ) + def test_seasonal_cess_date_from_rain_keeps_returning_same_outputs(): @@ -683,6 +746,24 @@ def test_seasonal_cess_date_from_rain_keeps_returning_same_outputs(): assert cess[0] == pd.to_datetime("2000-09-21T00:00:00.000000000") +def test_seasonal_cess_date_from_rain_keeps_returning_same_outputs_with_regroup(): + + precip = data_test_calc.multi_year_data_sample() + cessds = calc.regroup( + time_series=precip, group="1 Sep - 30 Nov" + ).map(calc.cess_date_from_rain, **{ + "dry_thresh": 5, + "dry_spell_length_thresh": 3, + "et": 5, + "taw": 60, + "sminit": 33.57026932, # from previous test sm output on 8/31/2000 + }) + cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) + cess = (cessds + cessds["T"]).squeeze() + + assert cess[0] == pd.to_datetime("2000-09-21T00:00:00.000000000") + + def test_seasonal_onset_date(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") # this is rr_mrg.sel(T=slice("2000", "2005-02-28")).isel(X=150, Y=150).precip @@ -764,19 +845,15 @@ def test_seasonal_onset_date_with_regroup(): ).rename("synthetic_precip") onsetsds = calc.regroup( - time_series=synthetic_precip, - group="1 Mar - 20 Jun", - method="map", - method_kwargs={ - "func": calc.onset_date, + time_series=synthetic_precip, group="1 Mar - 20 Jun" + ).map(calc.onset_date, **{ "wet_thresh": 1, "wet_spell_length": 3, "wet_spell_thresh": 20, "min_wet_days": 1, "dry_spell_length": 7, "dry_spell_search": 21, - }, - ) + }) onsetsds = onsetsds.isel(T_bins=np.arange(0, onsetsds.size, 2), drop=True) onsets = (onsetsds + onsetsds["T"]).drop_vars("T") @@ -885,15 +962,11 @@ def test_seasonal_cess_date_with_regroup(): time_dim="T" ).to_array(name="soil moisture") cessds = calc.regroup( - time_series=wb, - group="1 Sep - 30 Nov", - method="map", - method_kwargs={ - "func": calc.cess_date_from_sm, + time_series=wb, group="1 Sep - 30 Nov", + ).map(calc.cess_date_from_sm, **{ "dry_thresh": 5, "dry_spell_length_thresh": 3, - }, - ) + }) cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) cess = (cessds + cessds["T"]).squeeze(drop=True) @@ -987,18 +1060,14 @@ def test_seasonal_cess_date_from_rain_with_regroup(): synthetic_precip, ).rename("synthetic_precip") cessds = calc.regroup( - time_series=synthetic_precip, - group="1 Sep - 30 Nov", - method="map", - method_kwargs={ - "func": calc.cess_date_from_rain, + time_series=synthetic_precip, group="1 Sep - 30 Nov" + ).map(calc.cess_date_from_rain, **{ "dry_thresh": 5, "dry_spell_length_thresh": 3, "et": 5, "taw": 60, "sminit": 0, - }, - ) + }) cessds = cessds.isel(T=np.arange(0, cessds.size, 2), drop=True) cess = (cessds + cessds["T"]).squeeze()