From dffcd122c8f5080a913e7a98372b89c4189d9280 Mon Sep 17 00:00:00 2001 From: BaptisteDE Date: Fri, 13 Jun 2025 15:16:14 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix=20gaps=20description=20error?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_plumbing.py | 15 +++++++++++---- tide/plumbing.py | 17 +++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/test_plumbing.py b/tests/test_plumbing.py index 079e818..a5b544a 100644 --- a/tests/test_plumbing.py +++ b/tests/test_plumbing.py @@ -292,13 +292,19 @@ def gaps_data(self, time_index): return data - def test_basic_gaps_description(self, gaps_data): + def test_basic_gaps_description(self): """Test basic gap analysis functionality.""" - plumber = Plumber(gaps_data) + my_df = pd.DataFrame({ + + "temp__°C__Building": [np.nan, 1, np.nan, 3], + "power__W__Building": [np.nan, 1, 2, np.nan], + }, index=pd.date_range("2009", freq="h", periods=4, tz='UTC')) + + plumber = Plumber(my_df) result = plumber.get_gaps_description() # Check structure - assert all(col in result.columns for col in gaps_data.columns) + assert all(col in result.columns for col in my_df.columns) expected_stats = [ "data_presence_%", "count", @@ -315,7 +321,8 @@ def test_basic_gaps_description(self, gaps_data): # Check specific values temp_col = "temp__°C__Building" assert result[temp_col]["count"] == 2 - assert result[temp_col]["data_presence_%"] == pytest.approx(83.33, rel=1e-2) + assert result[temp_col]["data_presence_%"] == pytest.approx(50., rel=1e-2) + assert result["combination"]["max"] == pd.to_timedelta("02:00:00") def test_gap_thresholds(self, gaps_data): """Test gap analysis with duration thresholds.""" diff --git a/tide/plumbing.py b/tide/plumbing.py index 21591aa..524ff0f 100644 --- a/tide/plumbing.py +++ b/tide/plumbing.py @@ -154,7 +154,7 @@ class Plumber: Examples -------- - >>> from tide import Plumber + >>> from tide.plumbing import Plumber >>> import pandas as pd >>> # Create sample data with hierarchical column names >>> data = pd.DataFrame( @@ -249,7 +249,7 @@ def get_gaps_description( return_combination: bool = True, ) -> pd.DataFrame: """ - Get statistical description of gaps durations in the data. + Get a statistical description of gaps durations in the data. Parameters ---------- @@ -264,7 +264,8 @@ def get_gaps_description( gaps_gte : str or pd.Timedelta or dt.timedelta, optional Lower threshold for gap duration return_combination : bool, default True - Whether to include statistics for gaps present in any column + Whether to include statistics for gaps aggregation. + Useful to get statistics when all data are available. Returns ------- @@ -301,7 +302,7 @@ def get_gaps_description( durations = [] for gap in gaps_list: if len(gap) > 1: - durations.append(gap[-1] - gap[0]) + durations.append(gap[-1] - gap[0] + pd.to_timedelta(gap.freq)) else: durations.append(pd.to_timedelta(gap.freq)) @@ -405,7 +406,7 @@ def get_pipeline( Examples -------- - >>> from tide import Plumber + >>> from tide.plumbing import Plumber >>> import pandas as pd >>> # Create sample data >>> data = pd.DataFrame( @@ -507,7 +508,7 @@ def get_corrected_data( Examples -------- - >>> from tide import Plumber + >>> from tide.plumbing import Plumber >>> import pandas as pd >>> # Create sample data >>> data = pd.DataFrame( @@ -618,7 +619,7 @@ def plot_gaps_heatmap( Examples -------- - >>> from tide import Plumber + >>> from tide.plumbing import Plumber >>> import pandas as pd >>> # Create sample data with gaps >>> data = pd.DataFrame( @@ -793,7 +794,7 @@ def plot( Examples -------- - >>> from tide import Plumber + >>> from tide.plumbing import Plumber >>> import pandas as pd >>> # Create sample data >>> data = pd.DataFrame(