From 1abd6c13cdf0e5711c67e8c117c1f8aaccbe5b19 Mon Sep 17 00:00:00 2001 From: Pierlou Date: Tue, 7 Apr 2026 15:47:25 +0200 Subject: [PATCH 1/2] fix: only skip NA values if requested --- csv_detective/parsing/columns.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py index 81f4461..f778770 100755 --- a/csv_detective/parsing/columns.py +++ b/csv_detective/parsing/columns.py @@ -166,7 +166,12 @@ def build_remaining_tests_per_col(return_table: pd.DataFrame) -> dict[str, list[ for col in table.columns } handle_empty_columns(return_table) - empty_cols = {col for col in table.columns if table[col].dropna().empty} + empty_cols = ( + # if NA values should be considered valid, then we can skip empty columns + {col for col in table.columns if table[col].isna().all()} + if skipna + else {} + ) remaining_tests_per_col = build_remaining_tests_per_col(return_table) # hashing rows to get nb_duplicates @@ -217,7 +222,7 @@ def build_remaining_tests_per_col(return_table: pd.DataFrame) -> dict[str, list[ fill_value=0, ) for col in list(empty_cols): - if not batch[col].dropna().empty: + if not batch[col].isna().all(): empty_cols.discard(col) remaining_tests_per_col[col] = [ fmt_label From c36a444aa243a789890767429d224d5ac8f9fc5d Mon Sep 17 00:00:00 2001 From: Pierlou Date: Tue, 7 Apr 2026 15:55:23 +0200 Subject: [PATCH 2/2] chore: lint --- csv_detective/parsing/columns.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py index f778770..edd97b4 100755 --- a/csv_detective/parsing/columns.py +++ b/csv_detective/parsing/columns.py @@ -168,9 +168,7 @@ def build_remaining_tests_per_col(return_table: pd.DataFrame) -> dict[str, list[ handle_empty_columns(return_table) empty_cols = ( # if NA values should be considered valid, then we can skip empty columns - {col for col in table.columns if table[col].isna().all()} - if skipna - else {} + {col for col in table.columns if table[col].isna().all()} if skipna else {} ) remaining_tests_per_col = build_remaining_tests_per_col(return_table)