diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..410e0d7e8aeeb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14093,7 +14093,11 @@ def to_period( setattr(new_obj, axis_name, new_ax) return new_obj - def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: + def isin( + self, + values: Series | DataFrame | Sequence | Mapping, + ignore_index: bool = False, + ) -> DataFrame: """ Whether each element in the DataFrame is contained in values. @@ -14101,10 +14105,26 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: ---------- values : iterable, Series, DataFrame or dict The result will only be true at a location if all the - labels match. If `values` is a Series, that's the index. If - `values` is a dict, the keys must be the column names, - which must match. If `values` is a DataFrame, - then both the index and column labels must match. + labels match. + + - If `values` is a Series, the index labels must match. + - If `values` is a dict, the keys must be column names, + which must match. + - If `values` is a DataFrame: + + * When ``ignore_index=False`` (default), both the index + and column labels must match, and comparison is done + elementwise. + * When ``ignore_index=True``, only column labels must + match. Each element in the DataFrame is compared + against the set of values in the corresponding column + of ``values``, ignoring row index alignment. + + ignore_index : bool, default False + *Only valid when `values` is a DataFrame.* + If True, ignore index alignment and simply check + if each value in each column occurs in the same + column of `values`. Returns ------- @@ -14121,8 +14141,8 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: Notes ----- - ``__iter__`` is used (and not ``__contains__``) to iterate over values - when checking if it contains the elements in DataFrame. + ``__iter__`` is used (and not ``__contains__``) to iterate over values + when checking if it contains the elements in DataFrame. Examples -------- @@ -14185,9 +14205,12 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: raise ValueError("cannot compute isin with a duplicate axis.") result = self.eq(values.reindex_like(self), axis="index") elif isinstance(values, DataFrame): - if not (values.columns.is_unique and values.index.is_unique): - raise ValueError("cannot compute isin with a duplicate axis.") - result = self.eq(values.reindex_like(self)) + if ignore_index: + result = self.isin(values.to_dict("list")) + else: + if not (values.columns.is_unique and values.index.is_unique): + raise ValueError("cannot compute isin with a duplicate axis.") + result = self.eq(values.reindex_like(self)) else: if not is_list_like(values): raise TypeError( diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index b4511aad27a93..83a1353c9a77e 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -225,3 +225,65 @@ def test_isin_not_lossy(self): result = df.isin([val]) expected = DataFrame({"a": [True], "b": [False]}) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values_df,expected", + [ + # Case 1: Same values, different order + ( + DataFrame({"A": [2, 1], "B": [0, 2]}), + DataFrame({"A": [True, True], "B": [True, True]}), + ), + # Case 2: Subset of values + ( + DataFrame({"A": [1], "B": [0]}), + DataFrame({"A": [True, False], "B": [True, False]}), + ), + # Case 3: No matching values + ( + DataFrame({"A": [5, 6], "B": [7, 8]}), + DataFrame({"A": [False, False], "B": [False, False]}), + ), + # Case 4: Missing column + pytest.param( + DataFrame({"A": [1, 2]}), + DataFrame({"A": [True, True], "B": [False, False]}), + id="missing_column", + ), + ], + ) + def test_isin_ignore_index(self, values_df, expected): + """ + Test DataFrame.isin() with ignore_index=True for various scenarios. + + GH#62620 + """ + df = DataFrame({"A": [1, 2], "B": [0, 2]}) + result = df.isin(values_df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_isin_ignore_index_with_duplicates(self): + """ + Test that ignore_index=True works correctly with duplicate values. + + GH#62620 + """ + df = DataFrame({"A": [1, 2, 3], "B": [0, 0, 0]}) + values = DataFrame({"A": [1, 1, 2], "B": [0, 0, 0]}) + result = df.isin(values, ignore_index=True) + expected = DataFrame({"A": [True, True, False], "B": [True, True, True]}) + tm.assert_frame_equal(result, expected) + + def test_isin_ignore_index_diff_indexes(self): + """ + Test that ignore_index=True correctly ignores index values. + + GH#62620 + """ + df = DataFrame({"A": [1, 2], "B": [0, 2]}, index=["row1", "row2"]) + values = DataFrame({"A": [2, 1], "B": [2, 0]}, index=["x", "y"]) + result = df.isin(values, ignore_index=True) + expected = DataFrame( + {"A": [True, True], "B": [True, True]}, index=["row1", "row2"] + ) + tm.assert_frame_equal(result, expected)