From 0fab233091897458b5832d8364baeb6148019c1a Mon Sep 17 00:00:00 2001 From: Adrian Letchford Date: Wed, 15 Oct 2025 16:51:33 +0000 Subject: [PATCH] Print the top and bottom bad rows --- src/recx/results.py | 18 ++++++++++++++++-- tests/fixtures/frames.py | 20 ++++++++++++++++++++ tests/integration/test_rec.py | 14 ++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/recx/results.py b/src/recx/results.py index dc32340..5d152a9 100644 --- a/src/recx/results.py +++ b/src/recx/results.py @@ -7,6 +7,20 @@ logger = logging.getLogger(__name__) +def df2str( + df: pd.DataFrame, + max_rows: int | None = None, +) -> str: + args = [] + args += ["display.min_rows", max_rows] + args += ["display.max_rows", max_rows] + args += ["display.max_columns", None] + args += ["display.width", 10000] + + with pd.option_context(*args): + return df.__str__() + + class CheckResult: """ Result of an individual column (or index) check. @@ -42,7 +56,7 @@ def __init__( column: str | None = None, check_args: dict | None = None, min_dots: int = 5, - disp_rows: int = 10, + disp_rows: int = 20, ): self.failed_rows = failed_rows self.column = column @@ -138,7 +152,7 @@ def log_one_liner(self, width: int): def failures_str(self) -> str: # These are the rows we want to display to the user. - disp = str(self.failed_rows.tail(self.disp_rows)) + disp = df2str(self.failed_rows, max_rows=self.disp_rows) title = self.mini_signature() subtitle = f"Showing up to {self.disp_rows} rows" diff --git a/tests/fixtures/frames.py b/tests/fixtures/frames.py index 6954308..312616d 100644 --- a/tests/fixtures/frames.py +++ b/tests/fixtures/frames.py @@ -58,6 +58,26 @@ def abs_tol_frames(): return baseline, candidate +# Larger frames designed for tolerance sorting tests +@pytest.fixture +def abs_tol_frames_large(): + dates = pd.date_range(start="2024-01-01", periods=1000, freq="D") + values = list(range(1000)) + baseline = pd.DataFrame( + { + "date": dates, + "B": values, + } + ).set_index("date") + candidate = pd.DataFrame( + { + "date": dates, + "B": values[::-1], + } + ).set_index("date") + return baseline, candidate + + @pytest.fixture def equal_nan_frames(): baseline = pd.DataFrame({"A": [1.0, None]}) diff --git a/tests/integration/test_rec.py b/tests/integration/test_rec.py index 6a0b127..14e996b 100644 --- a/tests/integration/test_rec.py +++ b/tests/integration/test_rec.py @@ -79,3 +79,17 @@ def test_check_missin_indices(multi_index_frames): # Should pass because we are not checking for missing indices assert result.passed() + + +def test_summary_runs_without_error(abs_tol_frames_large): + b, c = abs_tol_frames_large + rec = Rec( + columns={ + "B": AbsTolCheck(tol=0.5, sort="desc"), + }, + align_date_col="date", + ) + result = rec.run(b, c) + + assert not result.passed() + result.summary()