From 45594fa29ab7f3b89d7775e2179400895cb719b8 Mon Sep 17 00:00:00 2001 From: sandeep raju <161712005+sandeeprjs92@users.noreply.github.com> Date: Tue, 14 Apr 2026 12:01:32 +0530 Subject: [PATCH] fix kfintech conflict resolver dropping group columns resolve_kfintech_conflicts used groupby.apply with include_groups=False to silence a pandas 2.2 FutureWarning. That flag strips the group columns (transaction_number, folio_number) from the apply result, so the downstream composite_key builder crashed with KeyError on any KFintech file that reached this step. Rewrite as a manual row-index scan: loop over groups, collect indices of rows to keep, return df.loc[kept]. Keeps every column, no deprecated api, same semantics. Regression test in test_cleaner.py runs a minimal KFintech df through Cleaner.run and asserts the expected columns survive. --- src/openreversefeed/core/conflict.py | 30 ++++++++++----- tests/unit/test_cleaner.py | 55 ++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 9 deletions(-) diff --git a/src/openreversefeed/core/conflict.py b/src/openreversefeed/core/conflict.py index 269c735..9f439ed 100644 --- a/src/openreversefeed/core/conflict.py +++ b/src/openreversefeed/core/conflict.py @@ -5,6 +5,16 @@ def resolve_kfintech_conflicts(df: pd.DataFrame) -> pd.DataFrame: + """If a `(transaction_number, folio_number)` group contains both a plain + ``P`` (purchase) and any ``*SIN`` marker row, keep only the first ``P`` + and drop the SIN rows. Groups without that collision are untouched. + + Implemented as a manual row-index scan rather than ``groupby.apply`` so + that the returned DataFrame keeps every column of the input — using + ``groupby.apply`` with ``include_groups=False`` strips the group columns + from the output, and ``include_groups=True`` is deprecated in pandas + 2.2. + """ if df.empty: return df.copy() @@ -12,17 +22,19 @@ def resolve_kfintech_conflicts(df: pd.DataFrame) -> pd.DataFrame: if not required.issubset(df.columns): return df.copy() - def _pick(group: pd.DataFrame) -> pd.DataFrame: + kept_indices: list = [] + for _key, group in df.groupby( + ["transaction_number", "folio_number"], sort=False + ): purreds = set(group["transaction_purred"]) has_sin = any( - isinstance(p, str) and (p == "SIN" or p.endswith("SIN")) for p in purreds + isinstance(p, str) and (p == "SIN" or p.endswith("SIN")) + for p in purreds ) if "P" in purreds and has_sin: - return group[group["transaction_purred"] == "P"].head(1) - return group + first_p = group[group["transaction_purred"] == "P"].head(1) + kept_indices.extend(first_p.index.tolist()) + else: + kept_indices.extend(group.index.tolist()) - return ( - df.groupby(["transaction_number", "folio_number"], sort=False, group_keys=False) - .apply(_pick, include_groups=False) - .reset_index(drop=True) - ) + return df.loc[kept_indices].reset_index(drop=True) diff --git a/tests/unit/test_cleaner.py b/tests/unit/test_cleaner.py index 9be12d2..c01ddbf 100644 --- a/tests/unit/test_cleaner.py +++ b/tests/unit/test_cleaner.py @@ -3,6 +3,7 @@ import pandas as pd from openreversefeed.adapters.cams import CamsAdapter +from openreversefeed.adapters.kfintech import KFintechFormat1Adapter from openreversefeed.core.cleaner import Cleaner @@ -32,3 +33,57 @@ def test_cleaner_runs_all_steps_on_minimal_cams_df(): assert "composite_key" in out.columns assert "action" in out.columns assert "action_tag" in out.columns + + +def test_cleaner_kfintech_preserves_group_columns_through_conflict_resolution(): + """Regression: the conflict resolver must not strip transaction_number or + folio_number from the cleaned DataFrame, otherwise the composite-key + builder fails with KeyError downstream.""" + cleaner = Cleaner() + adapter = KFintechFormat1Adapter() + df = pd.DataFrame( + [ + { + "transaction_id": "KF001", + "transaction_number": "T1", + "parent_transaction_number": "0", + "folio_number": "F1", + "product_code": "P1", + "scheme_code": "S1", + "units": 100.0, + "amount": 10000.0, + "transaction_date": date(2025, 1, 1), + "transaction_mode": "N", + "transaction_purred": "P", + "transaction_flag": "", + "transaction_type": "", + "registrar_row_index": 0, + "__source_meta": [{}], + }, + { + "transaction_id": "KF002", + "transaction_number": "T2", + "parent_transaction_number": "0", + "folio_number": "F1", + "product_code": "P1", + "scheme_code": "S1", + "units": 50.0, + "amount": 5000.0, + "transaction_date": date(2025, 1, 2), + "transaction_mode": "N", + "transaction_purred": "R", + "transaction_flag": "TO", + "transaction_type": "", + "registrar_row_index": 1, + "__source_meta": [{}], + }, + ] + ) + out = cleaner.run(df, adapter) + assert "transaction_number" in out.columns, ( + "transaction_number must survive the cleaner pipeline — " + "otherwise build_kfintech_key raises KeyError" + ) + assert "folio_number" in out.columns + assert "composite_key" in out.columns + assert not out["composite_key"].isna().any()