diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 380915b3494a3..434dc4cdb278b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1083,6 +1083,7 @@ MultiIndex - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) +- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`) - Bug in :meth:`MultiIndex.union` raising when indexes have duplicates with differing names (:issue:`62059`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) - Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..f677f1e37a016 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8491,7 +8491,10 @@ def to_series(right): # pass dtype to avoid doing inference, which would break consistency # with Index/Series ops dtype = None - if getattr(right, "dtype", None) == object: + if ( + getattr(right, "dtype", None) == "object" + or getattr(right, "dtype", None) == object + ): # can't pass right.dtype unconditionally as that would break on e.g. # datetime64[h] ndarray dtype = object @@ -8595,27 +8598,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): blockwise. """ rvalues = series._values - if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) + if lib.is_np_dtype(rvalues.dtype): + # We can losslessly+cheaply cast to ndarray + # i.e. ndarray or dt64[naive], td64 + # TODO(EA2D): no need to special case with 2D EAs + rvalues = np.asarray(rvalues) + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) else: - return series + rvalues = rvalues.reshape(1, -1) - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) + rvalues = np.broadcast_to(rvalues, self.shape) + # pass dtype to avoid doing inference + df = self._constructor(rvalues, dtype=rvalues.dtype) - rvalues = np.broadcast_to(rvalues, self.shape) - # pass dtype to avoid doing inference - return self._constructor( - rvalues, - index=self.index, - columns=self.columns, - dtype=rvalues.dtype, - ).__finalize__(series) + else: + # GH#61581 + if axis == 0: + df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) + else: + nrows = self.shape[0] + df = DataFrame( + {i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])}, + dtype=rvalues.dtype, + ) + df.index = self.index + df.columns = self.columns + return df.__finalize__(series) def _flex_arith_method( self, other, op, *, axis: Axis = "columns", level=None, fill_value=None @@ -8625,11 +8635,6 @@ def _flex_arith_method( if self._should_reindex_frame_op(other, op, axis, fill_value, level): return self._arith_method_with_reindex(other, op) - if isinstance(other, Series) and fill_value is not None: - # TODO: We could allow this in cases where we end up going - # through the DataFrame path - raise NotImplementedError(f"fill_value {fill_value} not supported.") - other = ops.maybe_prepare_scalar_for_op(other, self.shape) self, other = self._align_for_op(other, axis, flex=True, level=level) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 67762e0b89c73..04587243b5b07 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array): arr + ts with pytest.raises(TypeError, match=msg): ts + arr - if box_with_array is pd.DataFrame: - # TODO: before implementing resolution-inference we got the same - # message with DataFrame and non-DataFrame. Why did that change? - msg = "cannot add PeriodArray and Timestamp" - else: - msg = "cannot add PeriodArray and DatetimeArray" + msg = "cannot add PeriodArray and DatetimeArray" with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): @@ -1376,16 +1371,9 @@ def test_period_add_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): pd.Index([ts]) + arr - if box_with_array is pd.DataFrame: - msg = "cannot add PeriodArray and DatetimeArray" - else: - msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray" + msg = "cannot add PeriodArray and DatetimeArray" with pytest.raises(TypeError, match=msg): arr + pd.DataFrame([ts]) - if box_with_array is pd.DataFrame: - msg = "cannot add PeriodArray and DatetimeArray" - else: - msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'" with pytest.raises(TypeError, match=msg): pd.DataFrame([ts]) + arr diff --git a/pandas/tests/arithmetic/test_string.py b/pandas/tests/arithmetic/test_string.py index 66b7a5bd440c3..b046a70b92ce9 100644 --- a/pandas/tests/arithmetic/test_string.py +++ b/pandas/tests/arithmetic/test_string.py @@ -244,9 +244,6 @@ def test_mul(any_string_dtype): def test_add_strings(any_string_dtype, request): dtype = any_string_dtype - if dtype != np.dtype(object): - mark = pytest.mark.xfail(reason="GH-28527") - request.applymarker(mark) arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) assert arr.__add__(df) is NotImplemented @@ -260,11 +257,17 @@ def test_add_strings(any_string_dtype, request): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(reason="GH-28527") -def test_add_frame(dtype): +def test_add_frame(any_string_dtype, request): + # Inconsistent behavior between different versions of the python engine. + # Environments without PyArrow correctly return the value for python storage + # The same does not hold for + dtype = any_string_dtype + if HAS_PYARROW or getattr(dtype, "storage", None) != "python": + marks = pytest.mark.xfail(reason="GH-28527") + request.applymarker(marks) + arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) df = pd.DataFrame([["x", np.nan, "y", np.nan]]) - assert arr.__add__(df) is NotImplemented result = arr + df diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 312dfb72e0950..555e69dc82589 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators): ops(pd.Timestamp("20180101")) # invalid array-likes - if op not in ("__mul__", "__rmul__"): + if op not in ("__mul__", "__rmul__", "__add__", "__radd__"): # TODO(extension) numpy's mul with object array sees booleans as numbers msg = "|".join( [ diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index e4e26383ae42c..1531c50b307a2 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -160,8 +160,9 @@ def test_error_invalid_values(data, all_arithmetic_operators): ops(pd.Timestamp("20180101")) # invalid array-likes + str_ser = pd.Series("foo", index=s.index) with pytest.raises(TypeError, match=msg): - ops(pd.Series("foo", index=s.index)) + ops(str_ser) msg = "|".join( [ diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 5e50759d34014..71c8bf1e21cf0 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -629,11 +629,43 @@ def test_arith_flex_frame_corner(self, float_frame): expected = float_frame.sort_index() * np.nan tm.assert_frame_equal(result, expected) - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], fill_value=3) + @pytest.mark.parametrize("axis", [0, 1]) + def test_arith_flex_frame_fill_value_series(self, float_frame, axis): + rng = np.random.default_rng(60) + mask = rng.random(float_frame.shape) < 0.2 + left = float_frame.mask(mask) + right = left.iloc[0] + + result = left.add(right, axis=axis, fill_value=3) + + if axis == 0: # axis = index, vertical + pad_num = abs(result.shape[0] - len(right)) + mult_num = result.shape[1] + right_pad = np.pad( + right, (0, pad_num), mode="constant", constant_values=(np.nan) + ) + right_df = DataFrame( + [right_pad] * mult_num, columns=result.index, index=result.columns + ).T + + left = left.reindex_like(result) + + else: # axis = columns, horizontal + pad_num = abs(result.shape[1] - len(right)) + mult_num = result.shape[0] + right_pad = np.pad( + right, (0, pad_num), mode="constant", constant_values=(np.nan) + ) + right_df = DataFrame( + [right_pad] * mult_num, index=result.index, columns=result.columns + ) + + left_filled = left.fillna(3) + right_filled = right_df.fillna(3) + expected = right_filled + left_filled + expected = expected.mask(expected == 6, pd.NA) - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) def test_arith_flex_series_ops(self, simple_frame, op): @@ -675,11 +707,21 @@ def test_arith_flex_zero_len_raises(self): df_len0 = DataFrame(columns=["A", "B"]) df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - with pytest.raises(NotImplementedError, match="fill_value"): + msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'" + with pytest.raises(TypeError, match=msg): df.add(ser_len0, fill_value="E") - with pytest.raises(NotImplementedError, match="fill_value"): - df_len0.sub(df["A"], axis=None, fill_value=3) + result = df_len0.sub(df, axis=None, fill_value=3) + expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df_len0.sub(df["A"], axis=0, fill_value=3) + expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df_len0.sub(df["A"], axis=1, fill_value=3) + expected = DataFrame([], columns=["A", "B", 0, 1]) + tm.assert_frame_equal(result, expected, check_dtype=False) def test_flex_add_scalar_fill_value(self): # GH#12723 @@ -2201,3 +2243,37 @@ def test_mixed_col_index_dtype(string_dtype_no_object): expected.columns = expected.columns.astype(string_dtype_no_object) tm.assert_frame_equal(result, expected) + + +dt_params = [ + (tm.ALL_INT_NUMPY_DTYPES[0], 10), + (tm.ALL_INT_EA_DTYPES[0], 10), + (tm.FLOAT_NUMPY_DTYPES[0], 4.9), + (tm.FLOAT_EA_DTYPES[0], 4.9), +] + +axes = [0, 1] + + +@pytest.mark.parametrize( + "dtype,fill_val, axis", + [(dt, val, axis) for axis in axes for dt, val in dt_params], +) +def test_df_mul_array_fill_value(dtype, fill_val, axis): + # GH 61581 + if dtype == tm.ALL_INT_NUMPY_DTYPES[0]: + # Numpy int type cannot represent NaN + safe_null = fill_val + else: + safe_null = np.nan + + df = DataFrame([[safe_null, 1, 2], [3, safe_null, 5]], dtype=dtype) + + mult = pd.array([safe_null, 1.0], dtype=dtype) + + result = df.mul(mult, axis=0, fill_value=fill_val) + expected = DataFrame( + [[safe_null * safe_null, fill_val, fill_val * 2], [3.0, fill_val, 5.0]] + ).astype(dtype) + + tm.assert_frame_equal(result, expected)