Skip to content

Commit 4e77fb7

Browse files
committed
Updated version of 62317
1 parent 99ae672 commit 4e77fb7

File tree

7 files changed

+118
-9
lines changed

7 files changed

+118
-9
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ Other enhancements
200200
- :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
201201
- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
202202
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
203+
- :class:`StringDtype` now supports addition to Series/DataFrame with strings (:issue:`61581`)
204+
203205
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
204206
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
205207
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
@@ -998,6 +1000,7 @@ MultiIndex
9981000
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
9991001
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
10001002
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
1003+
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
10011004
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
10021005
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
10031006
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)

pandas/core/arrays/arrow/array.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,14 @@ def _op_method_error_message(self, other, op) -> str:
890890
def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
891891
pa_type = self._pa_array.type
892892
other_original = other
893-
other = self._box_pa(other)
893+
try:
894+
other = self._box_pa(other)
895+
except (ValueError, pa.lib.ArrowTypeError) as err:
896+
# Categorical and Interval dtype raises errors in self._box_pa
897+
# Could be fixed in the future if needed
898+
raise TypeError(
899+
"Incompatible type when converting to PyArrow dtype for operation."
900+
) from err
894901

895902
if (
896903
pa.types.is_string(pa_type)
@@ -899,6 +906,11 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
899906
):
900907
if op in [operator.add, roperator.radd]:
901908
sep = pa.scalar("", type=pa_type)
909+
if not (is_scalar(other) or isinstance(other, pa.Scalar)):
910+
if len(other) == 0 or isna(other).any():
911+
other = other.cast(pa_type)
912+
elif isna(other):
913+
other = other.cast(pa_type)
902914
try:
903915
if op is operator.add:
904916
result = pc.binary_join_element_wise(self._pa_array, other, sep)

pandas/tests/arithmetic/test_period.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,6 @@ def test_period_add_timestamp_raises(self, box_with_array):
13611361
arr + ts
13621362
with pytest.raises(TypeError, match=msg):
13631363
ts + arr
1364-
13651364
msg = "cannot add PeriodArray and DatetimeArray"
13661365
with pytest.raises(TypeError, match=msg):
13671366
arr + Series([ts])

pandas/tests/arrays/boolean/test_arithmetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
118118
ops(pd.Timestamp("20180101"))
119119

120120
# invalid array-likes
121-
if op not in ("__mul__", "__rmul__"):
121+
if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
122122
# TODO(extension) numpy's mul with object array sees booleans as numbers
123123
msg = "|".join(
124124
[

pandas/tests/arrays/floating/test_arithmetic.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,42 @@ def test_error_invalid_values(data, all_arithmetic_operators):
152152
ops(pd.Timestamp("20180101"))
153153

154154
# invalid array-likes
155-
with pytest.raises(TypeError, match=msg):
156-
ops(pd.Series("foo", index=s.index))
155+
str_ser = pd.Series("foo", index=s.index)
156+
if (
157+
all_arithmetic_operators
158+
in [
159+
"__add__",
160+
"__radd__",
161+
]
162+
and pd.options.future.infer_string
163+
):
164+
res = ops(str_ser)
165+
if all_arithmetic_operators == "__radd__":
166+
data_expected = []
167+
for i in data:
168+
if pd.isna(i):
169+
data_expected.append(i)
170+
elif i.is_integer():
171+
data_expected.append("foo" + str(int(i)))
172+
else:
173+
data_expected.append("foo" + str(i))
174+
175+
expected = pd.Series(data_expected, index=s.index)
176+
else:
177+
data_expected = []
178+
for i in data:
179+
if pd.isna(i):
180+
data_expected.append(i)
181+
elif i.is_integer():
182+
data_expected.append(str(int(i)) + "foo")
183+
else:
184+
data_expected.append(str(i) + "foo")
185+
186+
expected = pd.Series(data_expected, index=s.index)
187+
tm.assert_series_equal(res, expected)
188+
else:
189+
with pytest.raises(TypeError, match=msg):
190+
ops(str_ser)
157191

158192
msg = "|".join(
159193
[

pandas/tests/arrays/integer/test_arithmetic.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,26 @@ def test_error_invalid_values(data, all_arithmetic_operators):
197197
# assert_almost_equal stricter, but the expected with pd.NA seems
198198
# more-correct than np.nan here.
199199
tm.assert_series_equal(res, expected)
200+
elif (
201+
all_arithmetic_operators
202+
in [
203+
"__add__",
204+
"__radd__",
205+
]
206+
and pd.options.future.infer_string
207+
):
208+
res = ops(str_ser)
209+
if all_arithmetic_operators == "__radd__":
210+
expected = pd.Series(
211+
[np.nan if pd.isna(x) == 1 else "foo" + str(x) for x in data],
212+
index=s.index,
213+
)
214+
else:
215+
expected = pd.Series(
216+
[np.nan if pd.isna(x) == 1 else str(x) + "foo" for x in data],
217+
index=s.index,
218+
)
219+
tm.assert_series_equal(res, expected)
200220
else:
201221
with tm.external_error_raised(TypeError):
202222
ops(str_ser)

pandas/tests/arrays/string_/test_string.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,19 @@ def test_mul(dtype):
249249
tm.assert_extension_array_equal(result, expected)
250250

251251

252+
def test_add_series(dtype):
253+
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
254+
df = pd.Series(["t", "y", "v", "w"], dtype=object)
255+
256+
result = arr + df
257+
expected = pd.Series(["at", "by", "cv", "dw"]).astype(dtype)
258+
tm.assert_series_equal(result, expected)
259+
260+
result = df + arr
261+
expected = pd.Series(["ta", "yb", "vc", "wd"]).astype(dtype)
262+
tm.assert_series_equal(result, expected)
263+
264+
252265
def test_add_strings(dtype):
253266
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
254267
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
@@ -263,20 +276,48 @@ def test_add_strings(dtype):
263276
tm.assert_frame_equal(result, expected)
264277

265278

266-
@pytest.mark.xfail(reason="GH-28527")
267279
def test_add_frame(dtype):
268280
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
269281
df = pd.DataFrame([["x", np.nan, "y", np.nan]])
270-
271282
assert arr.__add__(df) is NotImplemented
272283

273284
result = arr + df
274285
expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
275-
tm.assert_frame_equal(result, expected)
286+
tm.assert_frame_equal(result, expected, check_dtype=False)
276287

277288
result = df + arr
278289
expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
279-
tm.assert_frame_equal(result, expected)
290+
tm.assert_frame_equal(result, expected, check_dtype=False)
291+
292+
293+
@pytest.mark.parametrize(
294+
"invalid",
295+
[
296+
10,
297+
1.5,
298+
pd.Timedelta(hours=31),
299+
pd.Timestamp("2021-01-01"),
300+
True,
301+
pd.Period("2025-09"),
302+
pd.Categorical(["test"]),
303+
pd.offsets.Minute(3),
304+
pd.Interval(1, 2, closed="right"),
305+
],
306+
)
307+
def test_add_frame_invalid(dtype, invalid):
308+
arr = pd.array(["a", np.nan], dtype=dtype)
309+
df = pd.DataFrame([[invalid, invalid]])
310+
311+
msg = "|".join(
312+
[
313+
r"can only concatenate str \(not \".+\"\) to str",
314+
r"unsupported operand type\(s\) for \+: '.+' and 'str'",
315+
r"operation 'add' not supported for dtype 'str|string' with dtype '.+'",
316+
"Incompatible type when converting to PyArrow dtype for operation.",
317+
]
318+
)
319+
with pytest.raises(TypeError, match=msg):
320+
arr + df
280321

281322

282323
def test_comparison_methods_scalar(comparison_op, dtype):

0 commit comments

Comments
 (0)