From aa1b89d7d6b303be0f69826c9dd7490626c1df17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Tue, 1 Aug 2023 13:57:43 -0400 Subject: [PATCH 01/12] ideas for daily time grid crunching --- enacts/calc.py | 36 ++++++++++++++++++++++++ enacts/onset/tests/test_calc.py | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/enacts/calc.py b/enacts/calc.py index 7fa229422..0bef580c8 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -619,6 +619,42 @@ def daily_tobegroupedby_season( return daily_tobegroupedby_season +def seasonal_groups( + time_coord, start_day, start_month, end_day, end_month +): + """Create seasonal groups identified by first day of each season""" + if start_day == 29 and start_month == 2: + start_day = 1 + start_month = 3 + start_edges = sel_day_and_month(time_coord, start_day, start_month) + if end_day == 29 and end_month == 2: + end_edges = sel_day_and_month(time_coord, 1 , 3, offset=-1) + else: + end_edges = sel_day_and_month(time_coord, end_day, end_month) + if end_edges[0] < start_edges[0]: + end_edges = end_edges[1:] + if start_edges[-1] > end_edges[-1]: + start_edges = start_edges[:-2] + return xr.DataArray( + np.piecewise( + time_coord, + [((time_coord >= start_edges[t]) & (time_coord <= end_edges[t])).values + for t in range(start_edges.size)], + [start_edges[t] for t in range(start_edges.size)] + + [pd.to_datetime([np.nan])], + ), + dims=[time_coord.name], coords={time_coord.name: time_coord}, + ) + + +def dayofyear366(time_coord): + """create 366-day-of-year groups""" + return xr.where( + (np.fmod(time_coord.dt.year, 4) != 0) & (time_coord.dt.dayofyear >= (31+29)), + time_coord.dt.dayofyear + 1, + time_coord.dt.dayofyear, + ) + # Seasonal Functions def seasonal_onset_date( diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index 71ac18c4e..2b9aa8383 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -311,6 +311,56 @@ def test_daily_tobegroupedby_season_picks_right_end_dates(): ).all() +def test_seasonal_groups(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + sg = calc.seasonal_groups(data["T"], 29, 11, 29, 2) + weighted_seasonal_sum = (data + #drop days out of seasons + .where(~np.isnat(sg), drop=True) + #dummy non-reducing groupby transformation with map + .groupby(sg.where(~np.isnat(sg), drop=True)) + .map(lambda x: 1/x) #dummy because doesnot depend on group + #non-reducing groupby transformation from Methods + .groupby(sg.where(~np.isnat(sg), drop=True)) + .cumsum() + #reducing groupby transformation from Methods + .groupby(sg.where(~np.isnat(sg), drop=True)) + .sum().rename({"group": "T"}) + ) + print(weighted_seasonal_sum) + + assert True + + +def test_dayofyear366(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_clim = (data + .groupby(calc.dayofyear366(data["T"])) + .mean() + .rename({"group": "T_doy"}) + ) + print(data_clim) + #Let's have anomalies computed on a random other slice of the data + data_ano = data.sel(T=slice("2000-02-28", "2005-02-28")) + data_ano = (data_ano + .groupby(calc.dayofyear366(data_ano["T"])) + #can still do non-reducing transformation if feel like it + .cumsum() + #compute ano + .groupby(calc.dayofyear366(data_ano["T"])) + #I like the idea of having a reserved name for daily clim dim + #other than "group" but maybe not necessary... + - data_clim.rename({"T_doy": "group"}) + ) + print(data_ano) + + assert True + + def test_seasonal_onset_date_keeps_returning_same_outputs(): precip = data_test_calc.multi_year_data_sample() From a765cbea4180ecfa58f08ed8851bca760beae914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Tue, 22 Aug 2023 13:16:56 -0400 Subject: [PATCH 02/12] better comment (before writing full doc) --- enacts/calc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enacts/calc.py b/enacts/calc.py index 0bef580c8..2d38a15ab 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -648,7 +648,7 @@ def seasonal_groups( def dayofyear366(time_coord): - """create 366-day-of-year groups""" + """maps dates into 1-to-366 integers day-of-year""" return xr.where( (np.fmod(time_coord.dt.year, 4) != 0) & (time_coord.dt.dayofyear >= (31+29)), time_coord.dt.dayofyear + 1, From b63b73c7e1d60a7481d47d324f608b8106388ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Tue, 22 Aug 2023 13:31:28 -0400 Subject: [PATCH 03/12] use pd to determine leap years --- enacts/calc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enacts/calc.py b/enacts/calc.py index 2d38a15ab..f6115b86a 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -650,7 +650,7 @@ def seasonal_groups( def dayofyear366(time_coord): """maps dates into 1-to-366 integers day-of-year""" return xr.where( - (np.fmod(time_coord.dt.year, 4) != 0) & (time_coord.dt.dayofyear >= (31+29)), + (~time_coord.dt.is_leap_year) & (time_coord.dt.dayofyear >= (31+29)), time_coord.dt.dayofyear + 1, time_coord.dt.dayofyear, ) From ffd578b17e5bee944db9c73d4c768c9859de6ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Tue, 22 Aug 2023 13:49:20 -0400 Subject: [PATCH 04/12] can't start with 29-Feb if there are non-leap years --- enacts/calc.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index f6115b86a..32d463c85 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -623,9 +623,14 @@ def seasonal_groups( time_coord, start_day, start_month, end_day, end_month ): """Create seasonal groups identified by first day of each season""" - if start_day == 29 and start_month == 2: - start_day = 1 - start_month = 3 + if ( + (start_day == 29) + and (start_month == 2) + and ((~time_coord.dt.is_leap_year).sum() > 0) + ) : + raise Exception( + "if there is at least one non-leap year in time_coord, can not start on 29-Feb" + ) start_edges = sel_day_and_month(time_coord, start_day, start_month) if end_day == 29 and end_month == 2: end_edges = sel_day_and_month(time_coord, 1 , 3, offset=-1) From 13ef0332c5909cd2a91337e1a6df4df1a0de86c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Tue, 22 Aug 2023 13:54:40 -0400 Subject: [PATCH 05/12] better naming, commenting (before writing full doc) --- enacts/calc.py | 5 +++-- enacts/onset/tests/test_calc.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index 32d463c85..d2e415c90 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -619,10 +619,11 @@ def daily_tobegroupedby_season( return daily_tobegroupedby_season -def seasonal_groups( +def season_starts( time_coord, start_day, start_month, end_day, end_month ): - """Create seasonal groups identified by first day of each season""" + """maps dates that belong to a season to its season's start, + to NaT if doesn't belong""" if ( (start_day == 29) and (start_month == 2) diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index 2b9aa8383..e8a4d5390 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -311,11 +311,11 @@ def test_daily_tobegroupedby_season_picks_right_end_dates(): ).all() -def test_seasonal_groups(): +def test_season_starts(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") data = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) - sg = calc.seasonal_groups(data["T"], 29, 11, 29, 2) + sg = calc.season_starts(data["T"], 29, 11, 29, 2) weighted_seasonal_sum = (data #drop days out of seasons .where(~np.isnat(sg), drop=True) From 6157ade495fdd3105513c3cd6ce614cb1538f9e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Wed, 23 Aug 2023 10:00:31 -0400 Subject: [PATCH 06/12] creating edges array from scratch rather than extracting from time_coord --- enacts/calc.py | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index d2e415c90..121d82e48 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd import xarray as xr +import datetime # Date Reading functions def read_zarr_data(zarr_path): @@ -620,7 +621,13 @@ def daily_tobegroupedby_season( def season_starts( - time_coord, start_day, start_month, end_day, end_month + time_coord, + start_day, + start_month, + end_day, + end_month, + start_year=None, + end_year=None, ): """maps dates that belong to a season to its season's start, to NaT if doesn't belong""" @@ -632,15 +639,37 @@ def season_starts( raise Exception( "if there is at least one non-leap year in time_coord, can not start on 29-Feb" ) - start_edges = sel_day_and_month(time_coord, start_day, start_month) - if end_day == 29 and end_month == 2: - end_edges = sel_day_and_month(time_coord, 1 , 3, offset=-1) + # Have edges start early enough, end late enough, + if start_year is None : + start_year = time_coord[0].dt.year - 1 + if end_year is None : + end_year = time_coord[-1].dt.year + 1 + # ending season on 29-Feb means ending on 1-Mar with -1 day offset + if (end_day == 29 and end_month == 2) : + end_day = 1 + end_month = 3 + day_offset = pd.Timedelta(days=1) else: - end_edges = sel_day_and_month(time_coord, end_day, end_month) - if end_edges[0] < start_edges[0]: - end_edges = end_edges[1:] - if start_edges[-1] > end_edges[-1]: - start_edges = start_edges[:-2] + day_offset = pd.Timedelta(days=0) + # seasons overlapping 2 years call for 1 year offset + year_offset = 0 + if ( + datetime.datetime(start_year, start_month, start_day) + > datetime.datetime(start_year, end_month, end_day) + ) : + year_offset = 1 + # Create start_/end_edges pairing arrays + start_edges = pd.date_range( + start=datetime.datetime(start_year, start_month, 1), + end=datetime.datetime(end_year - year_offset, start_month, 1), + freq="12MS", + ) + pd.Timedelta(days=start_day-1) + end_edges = pd.date_range( + start=datetime.datetime(start_year + year_offset, end_month, 1), + end=datetime.datetime(end_year, end_month, 1), + freq="12MS", + ) + pd.Timedelta(days=end_day-1) - day_offset + # Return dates mapped to their season's start return xr.DataArray( np.piecewise( time_coord, From b7aeda115a9692bc30a33cee0a2f6b9e92e9d19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Wed, 30 Aug 2023 09:28:49 -0400 Subject: [PATCH 07/12] removing the part for climato and ano --- enacts/calc.py | 8 -------- enacts/onset/tests/test_calc.py | 27 --------------------------- 2 files changed, 35 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index 121d82e48..1e77edf0f 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -682,14 +682,6 @@ def season_starts( ) -def dayofyear366(time_coord): - """maps dates into 1-to-366 integers day-of-year""" - return xr.where( - (~time_coord.dt.is_leap_year) & (time_coord.dt.dayofyear >= (31+29)), - time_coord.dt.dayofyear + 1, - time_coord.dt.dayofyear, - ) - # Seasonal Functions def seasonal_onset_date( diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index e8a4d5390..4b374bc1f 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -334,33 +334,6 @@ def test_season_starts(): assert True -def test_dayofyear366(): - - t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") - data = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) - data_clim = (data - .groupby(calc.dayofyear366(data["T"])) - .mean() - .rename({"group": "T_doy"}) - ) - print(data_clim) - #Let's have anomalies computed on a random other slice of the data - data_ano = data.sel(T=slice("2000-02-28", "2005-02-28")) - data_ano = (data_ano - .groupby(calc.dayofyear366(data_ano["T"])) - #can still do non-reducing transformation if feel like it - .cumsum() - #compute ano - .groupby(calc.dayofyear366(data_ano["T"])) - #I like the idea of having a reserved name for daily clim dim - #other than "group" but maybe not necessary... - - data_clim.rename({"T_doy": "group"}) - ) - print(data_ano) - - assert True - - def test_seasonal_onset_date_keeps_returning_same_outputs(): precip = data_test_calc.multi_year_data_sample() From 60123c1543a85b5a68989f992a59896a04eaf0c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Wed, 30 Aug 2023 14:50:40 -0400 Subject: [PATCH 08/12] revisiting groups for seasons --- enacts/calc.py | 84 +++++++++++++++++++++++++-------- enacts/onset/tests/test_calc.py | 55 ++++++++++++++------- 2 files changed, 102 insertions(+), 37 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index 1e77edf0f..056f8eb38 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -620,30 +620,62 @@ def daily_tobegroupedby_season( return daily_tobegroupedby_season -def season_starts( - time_coord, +def assign_season_coords( + daily_data, start_day, start_month, end_day, end_month, start_year=None, end_year=None, + time_dim="T", ): - """maps dates that belong to a season to its season's start, - to NaT if doesn't belong""" + """Assign season start and season end coordinates to this object, + mapping time dimension dates that belong to a season to their season start and end. + Data outside seasons are dropped. + + Parameters + ----------- + daily_data : DataArray, Dataset + Daily data to be grouped. + start_day : int + Day of the start date of the season. + start_month : int + Month of the start date of the season. + end_day : int + Day of the end date of the season. + end_month : int + Day of the end date of the season. + time_dim : str, optional + Time coordinate in `daily_data` (default `time_dim`="T"). + Returns + ------- + See Also + -------- + Notes + ----- + Examples + -------- + """ if ( (start_day == 29) and (start_month == 2) - and ((~time_coord.dt.is_leap_year).sum() > 0) + and ((~daily_data[time_dim].dt.is_leap_year).sum() > 0) ) : raise Exception( "if there is at least one non-leap year in time_coord, can not start on 29-Feb" ) - # Have edges start early enough, end late enough, + # Have edges start early enough, and end late enough, if start_year is None : - start_year = time_coord[0].dt.year - 1 + year_offset = 1 + if daily_data[time_dim][0].dt.year == daily_data[time_dim][1].dt.year : + year_offset = 0 + start_year = (daily_data[time_dim][0].dt.year - year_offset).values if end_year is None : - end_year = time_coord[-1].dt.year + 1 + year_offset = 1 + if daily_data[time_dim][0].dt.year == daily_data[time_dim][1].dt.year : + year_offset = 0 + end_year = (daily_data[time_dim][-1].dt.year + year_offset).values # ending season on 29-Feb means ending on 1-Mar with -1 day offset if (end_day == 29 and end_month == 2) : end_day = 1 @@ -669,19 +701,31 @@ def season_starts( end=datetime.datetime(end_year, end_month, 1), freq="12MS", ) + pd.Timedelta(days=end_day-1) - day_offset - # Return dates mapped to their season's start - return xr.DataArray( - np.piecewise( - time_coord, - [((time_coord >= start_edges[t]) & (time_coord <= end_edges[t])).values - for t in range(start_edges.size)], - [start_edges[t] for t in range(start_edges.size)] - + [pd.to_datetime([np.nan])], - ), - dims=[time_coord.name], coords={time_coord.name: time_coord}, + # dates mapped to the season start and end + season_start = xr.DataArray(np.piecewise( + daily_data[time_dim], + [((daily_data[time_dim] >= start_edges[t]) & (daily_data[time_dim] <= end_edges[t])).values + for t in range(start_edges.size)], + [start_edges[t] for t in range(start_edges.size)] + + [pd.to_datetime([np.nan])], + ), dims=[time_dim], coords={time_dim: daily_data[time_dim]}) + season_end = xr.DataArray(np.piecewise( + daily_data[time_dim], + [((daily_data[time_dim] >= start_edges[t]) & (daily_data[time_dim] <= end_edges[t])).values + for t in range(end_edges.size)], + [end_edges[t] for t in range(end_edges.size)] + + [pd.to_datetime([np.nan])], + ), dims=[time_dim], coords={time_dim: daily_data[time_dim]}) + # Drop days out of seasons and assign coords + return (daily_data.where(~np.isnat(season_start), drop=True) + .assign_coords( + season_start=(time_dim, season_start[~np.isnat(season_start)].data) + ) + .assign_coords( + season_end=(time_dim, season_end[~np.isnat(season_end)].data) + ) ) - - + # Seasonal Functions def seasonal_onset_date( diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index 4b374bc1f..1237f7226 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -311,27 +311,48 @@ def test_daily_tobegroupedby_season_picks_right_end_dates(): ).all() -def test_season_starts(): +def test_assign_season_coords(): t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") - data = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) - sg = calc.season_starts(data["T"], 29, 11, 29, 2) - weighted_seasonal_sum = (data - #drop days out of seasons - .where(~np.isnat(sg), drop=True) - #dummy non-reducing groupby transformation with map - .groupby(sg.where(~np.isnat(sg), drop=True)) - .map(lambda x: 1/x) #dummy because doesnot depend on group - #non-reducing groupby transformation from Methods - .groupby(sg.where(~np.isnat(sg), drop=True)) - .cumsum() - #reducing groupby transformation from Methods - .groupby(sg.where(~np.isnat(sg), drop=True)) - .sum().rename({"group": "T"}) + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_set = xr.Dataset( + {"data_var": (["T"], data_var.data)}, + coords={"T": t} ) - print(weighted_seasonal_sum) + data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2) + + assert ((data_set["T"] >= data_set["season_start"]) + & (data_set["T"] <= data_set["season_end"])).all() + + +def test_assign_season_coords_season_start_29Feb(): + + t = pd.date_range(start="2000-01-01", end="2000-06-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 29, 2, 29, 3) + + assert ((data_var["T"] >= data_var["season_start"]) + & (data_var["T"] <= data_var["season_end"])).all() + + +def test_assign_season_coords_season_end_29Feb(): + + t = pd.date_range(start="2000-01-01", end="2001-03-02", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 19, 1, 29, 2) - assert True + assert data_var["T"][-1].dt.day == 28 + + +def test_assign_season_coords_start_366before_end(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 19, 11, 29, 2) + + assert ( + (data_var["season_end"] - data_var["season_start"]) <= pd.Timedelta(days=366) + ).all() def test_seasonal_onset_date_keeps_returning_same_outputs(): From e0116145282db2b2742a6a813681eaf8b14f3e19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Wed, 20 Sep 2023 16:46:59 -0400 Subject: [PATCH 09/12] simple implementation of finding the edges --- enacts/calc.py | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index 056f8eb38..ccca369bf 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -665,42 +665,21 @@ def assign_season_coords( raise Exception( "if there is at least one non-leap year in time_coord, can not start on 29-Feb" ) - # Have edges start early enough, and end late enough, - if start_year is None : - year_offset = 1 - if daily_data[time_dim][0].dt.year == daily_data[time_dim][1].dt.year : - year_offset = 0 - start_year = (daily_data[time_dim][0].dt.year - year_offset).values - if end_year is None : - year_offset = 1 - if daily_data[time_dim][0].dt.year == daily_data[time_dim][1].dt.year : - year_offset = 0 - end_year = (daily_data[time_dim][-1].dt.year + year_offset).values # ending season on 29-Feb means ending on 1-Mar with -1 day offset if (end_day == 29 and end_month == 2) : end_day = 1 end_month = 3 - day_offset = pd.Timedelta(days=1) + day_offset = -1 else: - day_offset = pd.Timedelta(days=0) - # seasons overlapping 2 years call for 1 year offset - year_offset = 0 - if ( - datetime.datetime(start_year, start_month, start_day) - > datetime.datetime(start_year, end_month, end_day) - ) : - year_offset = 1 + day_offset = 0 # Create start_/end_edges pairing arrays - start_edges = pd.date_range( - start=datetime.datetime(start_year, start_month, 1), - end=datetime.datetime(end_year - year_offset, start_month, 1), - freq="12MS", - ) + pd.Timedelta(days=start_day-1) - end_edges = pd.date_range( - start=datetime.datetime(start_year + year_offset, end_month, 1), - end=datetime.datetime(end_year, end_month, 1), - freq="12MS", - ) + pd.Timedelta(days=end_day-1) - day_offset + dense_time = daily_data[time_dim].resample({time_dim: "1D"}).asfreq() + start_edges = sel_day_and_month(dense_time, start_day, start_month) + end_edges = sel_day_and_month(dense_time, end_day, end_month, offset=day_offset) + if start_edges[0] > end_edges[0] : + start_edges = xr.concat([daily_data[time_dim][0], start_edges], dim=time_dim) + if end_edges[-1] < start_edges[-1] : + end_edges = xr.concat([end_edges, daily_data[time_dim][-1]], dim=time_dim) # dates mapped to the season start and end season_start = xr.DataArray(np.piecewise( daily_data[time_dim], From dbb355c3cf60a9fa0c6a72d1ac7a6e711787cc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Mon, 9 Oct 2023 14:47:20 +0200 Subject: [PATCH 10/12] let user deal with years. Completed doc --- enacts/calc.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index ccca369bf..861725322 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -626,8 +626,6 @@ def assign_season_coords( start_month, end_day, end_month, - start_year=None, - end_year=None, time_dim="T", ): """Assign season start and season end coordinates to this object, @@ -647,13 +645,27 @@ def assign_season_coords( end_month : int Day of the end date of the season. time_dim : str, optional - Time coordinate in `daily_data` (default `time_dim`="T"). + Time coordinate in `daily_data` (default `time_dim` ="T"). + Returns - ------- + ------- + daily_data : DataArray, Dataset + `daily_data` where days outside the seasons of interest are dropped + and with two additional coordinates `season_start` and `season_end` + that map the time dimension dates that belong to a season to + their season start and end respectively. + See Also -------- + xarray.DataArray.assign_coords, xarray.DataArray.groupby + Notes ----- + If the first (last) season is truncated, because `daily_data` starts (ends) within it, + The first (last) element of `season_start` ( `season_end` ) differs from all others + and is the first (last) element of `time_dim` . + The additional coordinates are expected to be used by a groupby function. + Examples -------- """ From 8e9d3021054eaf9a177e3dcd7e4e5e96a8d632e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Mon, 9 Oct 2023 14:53:23 +0200 Subject: [PATCH 11/12] generalizing language around time. It's not like xr understand daily anyway --- enacts/calc.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/enacts/calc.py b/enacts/calc.py index 861725322..2de88bc35 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -621,7 +621,7 @@ def daily_tobegroupedby_season( def assign_season_coords( - daily_data, + time_series, start_day, start_month, end_day, @@ -634,8 +634,8 @@ def assign_season_coords( Parameters ----------- - daily_data : DataArray, Dataset - Daily data to be grouped. + time_series : DataArray, Dataset + Time-dependent data to be grouped. start_day : int Day of the start date of the season. start_month : int @@ -645,12 +645,12 @@ def assign_season_coords( end_month : int Day of the end date of the season. time_dim : str, optional - Time coordinate in `daily_data` (default `time_dim` ="T"). + Time coordinate in `time_series` (default `time_dim` ="T"). Returns ------- - daily_data : DataArray, Dataset - `daily_data` where days outside the seasons of interest are dropped + time_series : DataArray, Dataset + `time_series` where days outside the seasons of interest are dropped and with two additional coordinates `season_start` and `season_end` that map the time dimension dates that belong to a season to their season start and end respectively. @@ -661,7 +661,7 @@ def assign_season_coords( Notes ----- - If the first (last) season is truncated, because `daily_data` starts (ends) within it, + If the first (last) season is truncated, because `time_series` starts (ends) within it, The first (last) element of `season_start` ( `season_end` ) differs from all others and is the first (last) element of `time_dim` . The additional coordinates are expected to be used by a groupby function. @@ -672,7 +672,7 @@ def assign_season_coords( if ( (start_day == 29) and (start_month == 2) - and ((~daily_data[time_dim].dt.is_leap_year).sum() > 0) + and ((~time_series[time_dim].dt.is_leap_year).sum() > 0) ) : raise Exception( "if there is at least one non-leap year in time_coord, can not start on 29-Feb" @@ -685,30 +685,30 @@ def assign_season_coords( else: day_offset = 0 # Create start_/end_edges pairing arrays - dense_time = daily_data[time_dim].resample({time_dim: "1D"}).asfreq() + dense_time = time_series[time_dim].resample({time_dim: "1D"}).asfreq() start_edges = sel_day_and_month(dense_time, start_day, start_month) end_edges = sel_day_and_month(dense_time, end_day, end_month, offset=day_offset) if start_edges[0] > end_edges[0] : - start_edges = xr.concat([daily_data[time_dim][0], start_edges], dim=time_dim) + start_edges = xr.concat([time_series[time_dim][0], start_edges], dim=time_dim) if end_edges[-1] < start_edges[-1] : - end_edges = xr.concat([end_edges, daily_data[time_dim][-1]], dim=time_dim) + end_edges = xr.concat([end_edges, time_series[time_dim][-1]], dim=time_dim) # dates mapped to the season start and end season_start = xr.DataArray(np.piecewise( - daily_data[time_dim], - [((daily_data[time_dim] >= start_edges[t]) & (daily_data[time_dim] <= end_edges[t])).values + time_series[time_dim], + [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values for t in range(start_edges.size)], [start_edges[t] for t in range(start_edges.size)] + [pd.to_datetime([np.nan])], - ), dims=[time_dim], coords={time_dim: daily_data[time_dim]}) + ), dims=[time_dim], coords={time_dim: time_series[time_dim]}) season_end = xr.DataArray(np.piecewise( - daily_data[time_dim], - [((daily_data[time_dim] >= start_edges[t]) & (daily_data[time_dim] <= end_edges[t])).values + time_series[time_dim], + [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values for t in range(end_edges.size)], [end_edges[t] for t in range(end_edges.size)] + [pd.to_datetime([np.nan])], - ), dims=[time_dim], coords={time_dim: daily_data[time_dim]}) + ), dims=[time_dim], coords={time_dim: time_series[time_dim]}) # Drop days out of seasons and assign coords - return (daily_data.where(~np.isnat(season_start), drop=True) + return (time_series.where(~np.isnat(season_start), drop=True) .assign_coords( season_start=(time_dim, season_start[~np.isnat(season_start)].data) ) From 5382ccde7f8f3b4dc39a2c51e87827d24a8bf691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Cousin?= Date: Thu, 26 Oct 2023 13:33:03 +0200 Subject: [PATCH 12/12] test that we actually retain all that we want to --- enacts/onset/tests/test_calc.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index 1237f7226..2ee16272a 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -355,6 +355,27 @@ def test_assign_season_coords_start_366before_end(): ).all() +def test_assign_season_coords_retains(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_set = xr.Dataset( + {"data_var": (["T"], data_var.data)}, + coords={"T": t} + ) + data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2) + expected_T = ( + pd.date_range(start="2000-01-01", end="2000-02-29", freq="1D") + .union(pd.date_range(start="2000-11-29", end="2001-02-28", freq="1D")) + .union(pd.date_range(start="2001-11-29", end="2002-02-28", freq="1D")) + .union(pd.date_range(start="2002-11-29", end="2003-02-28", freq="1D")) + .union(pd.date_range(start="2003-11-29", end="2004-02-29", freq="1D")) + .union(pd.date_range(start="2004-11-29", end="2005-02-28", freq="1D")) + ) + + assert (data_set["T"] == expected_T).all() + + def test_seasonal_onset_date_keeps_returning_same_outputs(): precip = data_test_calc.multi_year_data_sample()