From 45594fa29ab7f3b89d7775e2179400895cb719b8 Mon Sep 17 00:00:00 2001
From: sandeep raju <161712005+sandeeprjs92@users.noreply.github.com>
Date: Tue, 14 Apr 2026 12:01:32 +0530
Subject: [PATCH] fix kfintech conflict resolver dropping group columns

resolve_kfintech_conflicts used groupby.apply with include_groups=False
to silence a pandas 2.2 FutureWarning. That flag strips the group
columns (transaction_number, folio_number) from the apply result, so
the downstream composite_key builder crashed with KeyError on any
KFintech file that reached this step.

Rewrite as a manual row-index scan: loop over groups, collect indices
of rows to keep, return df.loc[kept]. Keeps every column, no
deprecated api, same semantics. Regression test in test_cleaner.py
runs a minimal KFintech df through Cleaner.run and asserts the
expected columns survive.
---
 src/openreversefeed/core/conflict.py | 30 ++++++++++-----
 tests/unit/test_cleaner.py           | 55 ++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/src/openreversefeed/core/conflict.py b/src/openreversefeed/core/conflict.py
index 269c735..9f439ed 100644
--- a/src/openreversefeed/core/conflict.py
+++ b/src/openreversefeed/core/conflict.py
@@ -5,6 +5,16 @@
 
 
 def resolve_kfintech_conflicts(df: pd.DataFrame) -> pd.DataFrame:
+    """If a `(transaction_number, folio_number)` group contains both a plain
+    ``P`` (purchase) and any ``*SIN`` marker row, keep only the first ``P``
+    and drop the SIN rows. Groups without that collision are untouched.
+
+    Implemented as a manual row-index scan rather than ``groupby.apply`` so
+    that the returned DataFrame keeps every column of the input — using
+    ``groupby.apply`` with ``include_groups=False`` strips the group columns
+    from the output, and ``include_groups=True`` is deprecated in pandas
+    2.2.
+    """
     if df.empty:
         return df.copy()
 
@@ -12,17 +22,19 @@ def resolve_kfintech_conflicts(df: pd.DataFrame) -> pd.DataFrame:
     if not required.issubset(df.columns):
         return df.copy()
 
-    def _pick(group: pd.DataFrame) -> pd.DataFrame:
+    kept_indices: list = []
+    for _key, group in df.groupby(
+        ["transaction_number", "folio_number"], sort=False
+    ):
         purreds = set(group["transaction_purred"])
         has_sin = any(
-            isinstance(p, str) and (p == "SIN" or p.endswith("SIN")) for p in purreds
+            isinstance(p, str) and (p == "SIN" or p.endswith("SIN"))
+            for p in purreds
         )
         if "P" in purreds and has_sin:
-            return group[group["transaction_purred"] == "P"].head(1)
-        return group
+            first_p = group[group["transaction_purred"] == "P"].head(1)
+            kept_indices.extend(first_p.index.tolist())
+        else:
+            kept_indices.extend(group.index.tolist())
 
-    return (
-        df.groupby(["transaction_number", "folio_number"], sort=False, group_keys=False)
-        .apply(_pick, include_groups=False)
-        .reset_index(drop=True)
-    )
+    return df.loc[kept_indices].reset_index(drop=True)
diff --git a/tests/unit/test_cleaner.py b/tests/unit/test_cleaner.py
index 9be12d2..c01ddbf 100644
--- a/tests/unit/test_cleaner.py
+++ b/tests/unit/test_cleaner.py
@@ -3,6 +3,7 @@
 import pandas as pd
 
 from openreversefeed.adapters.cams import CamsAdapter
+from openreversefeed.adapters.kfintech import KFintechFormat1Adapter
 from openreversefeed.core.cleaner import Cleaner
 
 
@@ -32,3 +33,57 @@ def test_cleaner_runs_all_steps_on_minimal_cams_df():
     assert "composite_key" in out.columns
     assert "action" in out.columns
     assert "action_tag" in out.columns
+
+
+def test_cleaner_kfintech_preserves_group_columns_through_conflict_resolution():
+    """Regression: the conflict resolver must not strip transaction_number or
+    folio_number from the cleaned DataFrame, otherwise the composite-key
+    builder fails with KeyError downstream."""
+    cleaner = Cleaner()
+    adapter = KFintechFormat1Adapter()
+    df = pd.DataFrame(
+        [
+            {
+                "transaction_id": "KF001",
+                "transaction_number": "T1",
+                "parent_transaction_number": "0",
+                "folio_number": "F1",
+                "product_code": "P1",
+                "scheme_code": "S1",
+                "units": 100.0,
+                "amount": 10000.0,
+                "transaction_date": date(2025, 1, 1),
+                "transaction_mode": "N",
+                "transaction_purred": "P",
+                "transaction_flag": "",
+                "transaction_type": "",
+                "registrar_row_index": 0,
+                "__source_meta": [{}],
+            },
+            {
+                "transaction_id": "KF002",
+                "transaction_number": "T2",
+                "parent_transaction_number": "0",
+                "folio_number": "F1",
+                "product_code": "P1",
+                "scheme_code": "S1",
+                "units": 50.0,
+                "amount": 5000.0,
+                "transaction_date": date(2025, 1, 2),
+                "transaction_mode": "N",
+                "transaction_purred": "R",
+                "transaction_flag": "TO",
+                "transaction_type": "",
+                "registrar_row_index": 1,
+                "__source_meta": [{}],
+            },
+        ]
+    )
+    out = cleaner.run(df, adapter)
+    assert "transaction_number" in out.columns, (
+        "transaction_number must survive the cleaner pipeline — "
+        "otherwise build_kfintech_key raises KeyError"
+    )
+    assert "folio_number" in out.columns
+    assert "composite_key" in out.columns
+    assert not out["composite_key"].isna().any()