From 8f12e3d6cb347fc6d3acaeaff898e401b2283459 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Jul 2025 13:52:04 -0700 Subject: [PATCH 1/3] BUG/DEPR: logical operation with bool and string (#61995) --- doc/source/whatsnew/v2.3.2.rst | 2 +- pandas/core/arrays/arrow/array.py | 19 +++++++++++++++++++ pandas/core/arrays/string_.py | 21 +++++++++++++++++++++ pandas/tests/strings/test_strings.py | 24 ++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index 03244c808ad03..53a8d28687518 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -25,7 +25,7 @@ Bug fixes - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the "string" type in the JSON Table Schema for :class:`StringDtype` columns (:issue:`61889`) - +- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`) .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 5b2d3b7c065a8..467b7d1a72310 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -829,6 +829,25 @@ def _logical_method(self, other, op): # integer types. Otherwise these are boolean ops. if pa.types.is_integer(self._pa_array.type): return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS) + elif ( + ( + pa.types.is_string(self._pa_array.type) + or pa.types.is_large_string(self._pa_array.type) + ) + and op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) else: return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 9c8dc2054106a..0c872d4f34076 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -49,6 +49,7 @@ missing, nanops, ops, + roperator, ) from pandas.core.algorithms import isin from pandas.core.array_algos import masked_reductions @@ -385,6 +386,26 @@ class BaseStringArray(ExtensionArray): dtype: StringDtype + # TODO(4.0): Once the deprecation here is enforced, this method can be + # removed and we use the parent class method instead. + def _logical_method(self, other, op): + if ( + op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) + return NotImplemented + @doc(ExtensionArray.tolist) def tolist(self): if self.ndim > 1: diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index c729b910d05a7..90ff27c9459ca 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -776,3 +776,27 @@ def test_series_str_decode(): result = Series([b"x", b"y"]).str.decode(encoding="UTF-8", errors="strict") expected = Series(["x", "y"], dtype="str") tm.assert_series_equal(result, expected) + + +def test_reversed_logical_ops(any_string_dtype): + # GH#60234 + dtype = any_string_dtype + warn = None if dtype == object else FutureWarning + left = Series([True, False, False, True]) + right = Series(["", "", "b", "c"], dtype=dtype) + + msg = "operations between boolean dtype and" + with tm.assert_produces_warning(warn, match=msg): + result = left | right + expected = left | right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left & right + expected = left & right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left ^ right + expected = left ^ right.astype(bool) + tm.assert_series_equal(result, expected) From 8388109ad2591c3cbbd102bb6bb64c89ee2645b5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 15 Aug 2025 09:14:26 +0200 Subject: [PATCH 2/3] Future -> DeprecationWarning --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 467b7d1a72310..cee31d799a7ac 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -844,7 +844,7 @@ def _logical_method(self, other, op): f"'{op_name}' operations between boolean dtype and {self.dtype} are " "deprecated and will raise in a future version. Explicitly " "cast the strings to a boolean dtype before operating instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) return op(other, self.astype(bool)) From 54e6bd152312ffe1e3e71e1734e282c26e00e49c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 15 Aug 2025 10:30:39 +0200 Subject: [PATCH 3/3] fixup --- pandas/core/arrays/string_.py | 2 +- pandas/tests/strings/test_strings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 0c872d4f34076..d497c18cb27d6 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -400,7 +400,7 @@ def _logical_method(self, other, op): f"'{op_name}' operations between boolean dtype and {self.dtype} are " "deprecated and will raise in a future version. Explicitly " "cast the strings to a boolean dtype before operating instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) return op(other, self.astype(bool)) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 90ff27c9459ca..dc60f7daf6dd2 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -781,7 +781,7 @@ def test_series_str_decode(): def test_reversed_logical_ops(any_string_dtype): # GH#60234 dtype = any_string_dtype - warn = None if dtype == object else FutureWarning + warn = None if dtype == object else DeprecationWarning left = Series([True, False, False, True]) right = Series(["", "", "b", "c"], dtype=dtype)