From ace7debf1d9065c09f9b8df0654492814120d7ea Mon Sep 17 00:00:00 2001 From: remic Date: Thu, 17 Jul 2025 10:12:40 -0400 Subject: [PATCH 1/3] function to count extreme events --- pepsico/app_calc.py | 54 +++++++++++++++++++++++++++++++++++ pepsico/test/test_app_calc.py | 10 +++++++ 2 files changed, 64 insertions(+) diff --git a/pepsico/app_calc.py b/pepsico/app_calc.py index 4e670792..a745de77 100644 --- a/pepsico/app_calc.py +++ b/pepsico/app_calc.py @@ -1,6 +1,7 @@ import xarray as xr import numpy as np from scipy.stats import norm +import operator as op #This is what we should need for the app @@ -623,3 +624,56 @@ def spells_length(flagged_data, dim): spells = spells_only.roll(**{dim: -1}) # Turns remaining 0s to NaN return spells.where(spells > 0) + + +def number_extreme_events_within_days( + daily_data, operator, threshold, window, dim="T" +): + """Count extreme events + + Extreme events constitute cumulative `daily_data` over `window` -day + (or shorther) verifying the `operator` `threshold` + + Parameters + ---------- + daily_data : DataArray + Array of daily data + operator : str + "lt", "le", "gt", "ge" + determines the condition to meet with respect to `threshold` + threshold : float + threshold to meet in `daily_data` units + window : int + maximum length of days to cumulate over to constitute an event + dim : str, optional + name of `daily_data` time dimension + + Returns + ------- + DataArray + Array of count of extreme events + + Notes + ----- + Is meant to be used to produce yearly seasonal time series + + Examples + -------- + Number of rain events of >80mm in 2 days or less: + number_extreme_events_within_days(rain_daily_data_in_mm, "gt", 80, 2) + """ + count = 0 + #Start with shortest events + for w in range(1, window+1): + for t in range(len(daily_data[dim])-(w-1)): + #Assert new event + new_event = getattr(op, operator)( + daily_data.isel({dim: slice(t, t+w)}).sum(dim), threshold + ) + #Mask days having formed a new event so that they won't account again + #for following events (t loop) or longer events (w loop) + daily_data[{dim: slice(t, t+w)}] = ( + daily_data[{dim: slice(t, t+w)}].where(~new_event) + ) + count = count + new_event + return count diff --git a/pepsico/test/test_app_calc.py b/pepsico/test/test_app_calc.py index 62b22503..56c01d98 100644 --- a/pepsico/test/test_app_calc.py +++ b/pepsico/test/test_app_calc.py @@ -108,3 +108,13 @@ def test_median_length_of_spells(): mendian_spell = app_calc.median_length_of_spells(precip, "T") assert mendian_spell == 2 + + +def test_number_extreme_events_within_days(): + t = pd.date_range(start="2000-05-01", end="2000-05-10", freq="1D") + values = 1 + 0*np.arange(t.size) + precip = xr.DataArray(values, coords={"T": t}) + count = app_calc.number_extreme_events_within_days(precip, "gt", 2, 3) + + np.testing.assert_array_equal(count, [3]) + From f281d1be47c1d4452ffb931b57621b858b326f3d Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 5 Nov 2025 13:44:52 -0500 Subject: [PATCH 2/3] don't alter daily_data and only cover case gt --- pepsico/app_calc.py | 18 ++++++------------ pepsico/test/test_app_calc.py | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pepsico/app_calc.py b/pepsico/app_calc.py index a745de77..28c5ea19 100644 --- a/pepsico/app_calc.py +++ b/pepsico/app_calc.py @@ -627,20 +627,17 @@ def spells_length(flagged_data, dim): def number_extreme_events_within_days( - daily_data, operator, threshold, window, dim="T" + daily_data, threshold, window, dim="T" ): """Count extreme events Extreme events constitute cumulative `daily_data` over `window` -day - (or shorther) verifying the `operator` `threshold` + (or shorther) greater than `threshold` Parameters ---------- daily_data : DataArray Array of daily data - operator : str - "lt", "le", "gt", "ge" - determines the condition to meet with respect to `threshold` threshold : float threshold to meet in `daily_data` units window : int @@ -663,17 +660,14 @@ def number_extreme_events_within_days( number_extreme_events_within_days(rain_daily_data_in_mm, "gt", 80, 2) """ count = 0 + dd = daily_data #Start with shortest events for w in range(1, window+1): - for t in range(len(daily_data[dim])-(w-1)): + for t in range(len(dd[dim])-(w-1)): #Assert new event - new_event = getattr(op, operator)( - daily_data.isel({dim: slice(t, t+w)}).sum(dim), threshold - ) + new_event = dd.isel({dim: slice(t, t+w)}).sum(dim) > threshold #Mask days having formed a new event so that they won't account again #for following events (t loop) or longer events (w loop) - daily_data[{dim: slice(t, t+w)}] = ( - daily_data[{dim: slice(t, t+w)}].where(~new_event) - ) + dd[{dim: slice(t, t+w)}] = dd[{dim: slice(t, t+w)}].where(~new_event) count = count + new_event return count diff --git a/pepsico/test/test_app_calc.py b/pepsico/test/test_app_calc.py index 56c01d98..d3100d3a 100644 --- a/pepsico/test/test_app_calc.py +++ b/pepsico/test/test_app_calc.py @@ -114,7 +114,7 @@ def test_number_extreme_events_within_days(): t = pd.date_range(start="2000-05-01", end="2000-05-10", freq="1D") values = 1 + 0*np.arange(t.size) precip = xr.DataArray(values, coords={"T": t}) - count = app_calc.number_extreme_events_within_days(precip, "gt", 2, 3) + count = app_calc.number_extreme_events_within_days(precip, 2, 3) np.testing.assert_array_equal(count, [3]) From b2eaa3d3fcf1744148f8bc04f5f1dd361f8e9889 Mon Sep 17 00:00:00 2001 From: remic Date: Wed, 12 Nov 2025 15:09:11 -0500 Subject: [PATCH 3/3] deep copy --- pepsico/app_calc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pepsico/app_calc.py b/pepsico/app_calc.py index 28c5ea19..36393f10 100644 --- a/pepsico/app_calc.py +++ b/pepsico/app_calc.py @@ -1,7 +1,6 @@ import xarray as xr import numpy as np from scipy.stats import norm -import operator as op #This is what we should need for the app @@ -660,7 +659,7 @@ def number_extreme_events_within_days( number_extreme_events_within_days(rain_daily_data_in_mm, "gt", 80, 2) """ count = 0 - dd = daily_data + dd = daily_data.copy() #Start with shortest events for w in range(1, window+1): for t in range(len(dd[dim])-(w-1)):