From 2f6ed0060f299b44c322a79aaf6dd0f95f727ff0 Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Tue, 7 Oct 2025 20:59:58 +0000 Subject: [PATCH 1/6] feat(): ignore index logic for df.isin() --- pandas/core/frame.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..67abf956c6473 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14093,7 +14093,11 @@ def to_period( setattr(new_obj, axis_name, new_ax) return new_obj - def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: + def isin( + self, + values: Series | DataFrame | Sequence | Mapping, + ignore_index: bool = False, + ) -> DataFrame: """ Whether each element in the DataFrame is contained in values. @@ -14101,10 +14105,24 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: ---------- values : iterable, Series, DataFrame or dict The result will only be true at a location if all the - labels match. If `values` is a Series, that's the index. If - `values` is a dict, the keys must be the column names, - which must match. If `values` is a DataFrame, - then both the index and column labels must match. + labels match. + - If `values` is a Series, the index labels must match. + - If `values` is a dict, the keys must be column names, + which must match. + - If `values` is a DataFrame: + * When ``ignore_index=False`` (default), both the index + and column labels must match, and comparison is done + elementwise. + * When ``ignore_index=True``, only column labels must + match. Each element in the DataFrame is compared + against the set of values in the corresponding column + of ``values``, ignoring row index alignment. + + ignore_index : bool, default False + *Only valid when `values` is a DataFrame.* + If True, ignore index alignment and simply check + if each value in each column occurs in the same + column of `values`. Returns ------- @@ -14185,9 +14203,12 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: raise ValueError("cannot compute isin with a duplicate axis.") result = self.eq(values.reindex_like(self), axis="index") elif isinstance(values, DataFrame): - if not (values.columns.is_unique and values.index.is_unique): - raise ValueError("cannot compute isin with a duplicate axis.") - result = self.eq(values.reindex_like(self)) + if ignore_index: + result = self.isin(values.to_dict("list")) + else: + if not (values.columns.is_unique and values.index.is_unique): + raise ValueError("cannot compute isin with a duplicate axis.") + result = self.eq(values.reindex_like(self)) else: if not is_list_like(values): raise TypeError( From d3317dca1d3de07c492ead489fa7cfedae26acad Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Wed, 8 Oct 2025 17:14:33 +0000 Subject: [PATCH 2/6] fix docstrings indentations for isin --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67abf956c6473..9dc0437f9c188 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14110,13 +14110,13 @@ def isin( - If `values` is a dict, the keys must be column names, which must match. - If `values` is a DataFrame: - * When ``ignore_index=False`` (default), both the index - and column labels must match, and comparison is done - elementwise. - * When ``ignore_index=True``, only column labels must - match. Each element in the DataFrame is compared - against the set of values in the corresponding column - of ``values``, ignoring row index alignment. + * When ``ignore_index=False`` (default), both the index + and column labels must match, and comparison is done + elementwise. + * When ``ignore_index=True``, only column labels must + match. Each element in the DataFrame is compared + against the set of values in the corresponding column + of ``values``, ignoring row index alignment. ignore_index : bool, default False *Only valid when `values` is a DataFrame.* @@ -14139,8 +14139,8 @@ def isin( Notes ----- - ``__iter__`` is used (and not ``__contains__``) to iterate over values - when checking if it contains the elements in DataFrame. + ``__iter__`` is used (and not ``__contains__``) to iterate over values + when checking if it contains the elements in DataFrame. Examples -------- From 2e338f09daf2bf7e9379b11ab284de687f55fef5 Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Tue, 14 Oct 2025 19:24:36 +0000 Subject: [PATCH 3/6] fix: indentation fix for build --- pandas/core/frame.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9dc0437f9c188..410e0d7e8aeeb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14106,17 +14106,19 @@ def isin( values : iterable, Series, DataFrame or dict The result will only be true at a location if all the labels match. + - If `values` is a Series, the index labels must match. - If `values` is a dict, the keys must be column names, - which must match. + which must match. - If `values` is a DataFrame: + * When ``ignore_index=False`` (default), both the index - and column labels must match, and comparison is done - elementwise. + and column labels must match, and comparison is done + elementwise. * When ``ignore_index=True``, only column labels must - match. Each element in the DataFrame is compared - against the set of values in the corresponding column - of ``values``, ignoring row index alignment. + match. Each element in the DataFrame is compared + against the set of values in the corresponding column + of ``values``, ignoring row index alignment. ignore_index : bool, default False *Only valid when `values` is a DataFrame.* From 31e3237589d494991b5d1bd1f9b4ccefadc9640e Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Wed, 15 Oct 2025 16:18:32 +0000 Subject: [PATCH 4/6] fix: shell pr to rerun build --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 410e0d7e8aeeb..47508af801e6e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14209,7 +14209,7 @@ def isin( result = self.isin(values.to_dict("list")) else: if not (values.columns.is_unique and values.index.is_unique): - raise ValueError("cannot compute isin with a duplicate axis.") + raise ValueError("cannot compute isin with a duplicate axis") result = self.eq(values.reindex_like(self)) else: if not is_list_like(values): From cbe2127993cb95eb4934af3aee9891c8eef9f0a9 Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Wed, 15 Oct 2025 21:02:27 +0000 Subject: [PATCH 5/6] fix: add period to rectify build error --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 47508af801e6e..410e0d7e8aeeb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14209,7 +14209,7 @@ def isin( result = self.isin(values.to_dict("list")) else: if not (values.columns.is_unique and values.index.is_unique): - raise ValueError("cannot compute isin with a duplicate axis") + raise ValueError("cannot compute isin with a duplicate axis.") result = self.eq(values.reindex_like(self)) else: if not is_list_like(values): From 9cce864ab278b11d78ca8761b85dc0219c2ac76b Mon Sep 17 00:00:00 2001 From: Saakshi More Date: Wed, 22 Oct 2025 16:11:20 +0000 Subject: [PATCH 6/6] test: add unit test for df.isin(... , ignore_index=True) --- pandas/tests/frame/methods/test_isin.py | 62 +++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index b4511aad27a93..83a1353c9a77e 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -225,3 +225,65 @@ def test_isin_not_lossy(self): result = df.isin([val]) expected = DataFrame({"a": [True], "b": [False]}) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values_df,expected", + [ + # Case 1: Same values, different order + ( + DataFrame({"A": [2, 1], "B": [0, 2]}), + DataFrame({"A": [True, True], "B": [True, True]}), + ), + # Case 2: Subset of values + ( + DataFrame({"A": [1], "B": [0]}), + DataFrame({"A": [True, False], "B": [True, False]}), + ), + # Case 3: No matching values + ( + DataFrame({"A": [5, 6], "B": [7, 8]}), + DataFrame({"A": [False, False], "B": [False, False]}), + ), + # Case 4: Missing column + pytest.param( + DataFrame({"A": [1, 2]}), + DataFrame({"A": [True, True], "B": [False, False]}), + id="missing_column", + ), + ], + ) + def test_isin_ignore_index(self, values_df, expected): + """ + Test DataFrame.isin() with ignore_index=True for various scenarios. + + GH#62620 + """ + df = DataFrame({"A": [1, 2], "B": [0, 2]}) + result = df.isin(values_df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_isin_ignore_index_with_duplicates(self): + """ + Test that ignore_index=True works correctly with duplicate values. + + GH#62620 + """ + df = DataFrame({"A": [1, 2, 3], "B": [0, 0, 0]}) + values = DataFrame({"A": [1, 1, 2], "B": [0, 0, 0]}) + result = df.isin(values, ignore_index=True) + expected = DataFrame({"A": [True, True, False], "B": [True, True, True]}) + tm.assert_frame_equal(result, expected) + + def test_isin_ignore_index_diff_indexes(self): + """ + Test that ignore_index=True correctly ignores index values. + + GH#62620 + """ + df = DataFrame({"A": [1, 2], "B": [0, 2]}, index=["row1", "row2"]) + values = DataFrame({"A": [2, 1], "B": [2, 0]}, index=["x", "y"]) + result = df.isin(values, ignore_index=True) + expected = DataFrame( + {"A": [True, True], "B": [True, True]}, index=["row1", "row2"] + ) + tm.assert_frame_equal(result, expected)