Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ MultiIndex
- :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`)
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like you accidentally over-wrote a different note?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like I did, will fix

- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
- Bug in :meth:`MultiIndex.union` raising when indexes have duplicates with differing names (:issue:`62059`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
Expand Down
53 changes: 29 additions & 24 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8491,7 +8491,10 @@ def to_series(right):
# pass dtype to avoid doing inference, which would break consistency
# with Index/Series ops
dtype = None
if getattr(right, "dtype", None) == object:
if (
getattr(right, "dtype", None) == "object"
or getattr(right, "dtype", None) == object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't be necessary

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, this is here to keep the output dtype for some operations as objects. Specifically, pandas/tests/arithmetic/test_string.py::test_add_strings[string=object]
returns

E       Attribute "dtype" are different
E       [left]:  <StringDtype(na_value=nan)>
E       [right]: object

When adding an object to another object. I can remove it if you think adding an exception for this test case is the better option.

):
# can't pass right.dtype unconditionally as that would break on e.g.
# datetime64[h] ndarray
dtype = object
Expand Down Expand Up @@ -8595,27 +8598,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
if lib.is_np_dtype(rvalues.dtype):
# We can losslessly+cheaply cast to ndarray
# i.e. ndarray or dt64[naive], td64
# TODO(EA2D): no need to special case with 2D EAs
rvalues = np.asarray(rvalues)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
return series
rvalues = rvalues.reshape(1, -1)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
df = self._constructor(rvalues, dtype=rvalues.dtype)

rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
return self._constructor(
rvalues,
index=self.index,
columns=self.columns,
dtype=rvalues.dtype,
).__finalize__(series)
else:
# GH#61581
if axis == 0:
df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
else:
nrows = self.shape[0]
df = DataFrame(
{i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
dtype=rvalues.dtype,
)
df.index = self.index
df.columns = self.columns
return df.__finalize__(series)

def _flex_arith_method(
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
Expand All @@ -8625,11 +8635,6 @@ def _flex_arith_method(
if self._should_reindex_frame_op(other, op, axis, fill_value, level):
return self._arith_method_with_reindex(other, op)

if isinstance(other, Series) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand All @@ -1376,16 +1371,9 @@ def test_period_add_timestamp_raises(self, box_with_array):
with pytest.raises(TypeError, match=msg):
pd.Index([ts]) + arr

if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + pd.DataFrame([ts])
if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'"
with pytest.raises(TypeError, match=msg):
pd.DataFrame([ts]) + arr

Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/arithmetic/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,6 @@ def test_mul(any_string_dtype):

def test_add_strings(any_string_dtype, request):
dtype = any_string_dtype
if dtype != np.dtype(object):
mark = pytest.mark.xfail(reason="GH-28527")
request.applymarker(mark)
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
assert arr.__add__(df) is NotImplemented
Expand All @@ -261,10 +258,15 @@ def test_add_strings(any_string_dtype, request):


@pytest.mark.xfail(reason="GH-28527")
def test_add_frame(dtype):
def test_add_frame(request, dtype):
if dtype.storage == "python":
# Inconsistent behavior between different versions of the python engine.
# Some return correctly, some return but with the wrong dtype
# Others just fail, we are blanket failing all
mark = pytest.mark.xfail(reason="[XPASS(strict)] GH-28527")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is confusing. There is a decorator xfailing the test. Now you're adding a second xfail with a reason being that it passes? do two xfail markers cancel out?

i think the thing to do is remove the decorator and add the marker in only the actually-failing cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's valid, I will edit the test

request.node.applymarker(mark)
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
df = pd.DataFrame([["x", np.nan, "y", np.nan]])

assert arr.__add__(df) is NotImplemented

result = arr + df
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = "|".join(
[
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
str_ser = pd.Series("foo", index=s.index)
with pytest.raises(TypeError, match=msg):
ops(pd.Series("foo", index=s.index))
ops(str_ser)

msg = "|".join(
[
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,7 @@ def test_astype_float(dtype, any_float_dtype):
# Don't compare arrays (37974)
ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype)
result = ser.astype(any_float_dtype)
item = np.nan if isinstance(result.dtype, np.dtype) else pd.NA
expected = pd.Series([1.1, item, 3.3], dtype=any_float_dtype)
expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems unrelated to this PR? sure its necessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I dont remember why I made this change

tm.assert_series_equal(result, expected)


Expand Down
107 changes: 100 additions & 7 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,11 +629,43 @@ def test_arith_flex_frame_corner(self, float_frame):
expected = float_frame.sort_index() * np.nan
tm.assert_frame_equal(result, expected)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], fill_value=3)
@pytest.mark.parametrize("axis", [0, 1])
def test_arith_flex_frame_fill_value_series(self, float_frame, axis):
rng = np.random.default_rng(60)
mask = rng.random(float_frame.shape) < 0.2
left = float_frame.mask(mask)
right = left.iloc[0]

result = left.add(right, axis=axis, fill_value=3)

if axis == 0: # axis = index, vertical
pad_num = abs(result.shape[0] - len(right))
mult_num = result.shape[1]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, columns=result.index, index=result.columns
).T

left = left.reindex_like(result)

else: # axis = columns, horizontal
pad_num = abs(result.shape[1] - len(right))
mult_num = result.shape[0]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, index=result.index, columns=result.columns
)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
left_filled = left.fillna(3)
right_filled = right_df.fillna(3)
expected = right_filled + left_filled
expected = expected.mask(expected == 6, pd.NA)

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
def test_arith_flex_series_ops(self, simple_frame, op):
Expand Down Expand Up @@ -675,11 +707,21 @@ def test_arith_flex_zero_len_raises(self):
df_len0 = DataFrame(columns=["A", "B"])
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

with pytest.raises(NotImplementedError, match="fill_value"):
msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'"
with pytest.raises(TypeError, match=msg):
df.add(ser_len0, fill_value="E")

with pytest.raises(NotImplementedError, match="fill_value"):
df_len0.sub(df["A"], axis=None, fill_value=3)
result = df_len0.sub(df, axis=None, fill_value=3)
expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=0, fill_value=3)
expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=1, fill_value=3)
expected = DataFrame([], columns=["A", "B", 0, 1])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_flex_add_scalar_fill_value(self):
# GH#12723
Expand Down Expand Up @@ -2201,3 +2243,54 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)


dt_params = [
(tm.ALL_INT_NUMPY_DTYPES[0], 10),
(tm.ALL_INT_EA_DTYPES[0], 10),
(tm.FLOAT_NUMPY_DTYPES[0], 4.9),
(tm.FLOAT_EA_DTYPES[0], 4.9),
]

axes = [0, 1]


@pytest.mark.parametrize(
"data_type,fill_val, axis",
[(dt, val, axis) for axis in axes for dt, val in dt_params],
)
def test_df_mul_array_fill_value(data_type, fill_val, axis):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i remain super-skeptical that this test (or test_arith_flex_frame_fill_value_series) are minimally-complex for what you are trying to do.

# GH 61581
base_data = np.arange(12).reshape(4, 3)
df = DataFrame(base_data)
mult_list = [np.nan, 1, 5, np.nan]
mult_list = mult_list[: df.shape[axis]]

if data_type in tm.ALL_INT_NUMPY_DTYPES:
# Numpy int type cannot represent NaN
mult_np = np.asarray(mult_list)
mult_list = np.nan_to_num(mult_np, nan=fill_val)

mult_data = pd.array(mult_list, dtype=data_type)

for i in range(df.shape[0]):
try:
df.iat[i, i] = np.nan
df.iat[i + 2, i] = pd.NA
except IndexError:
pass

if axis == 0:
mult_mat = np.broadcast_to(mult_data.reshape(-1, 1), df.shape)
mask = np.isnan(mult_mat)
else:
mult_mat = np.broadcast_to(mult_data.reshape(1, -1), df.shape)
mask = np.isnan(mult_mat)
mask = df.isna().values & mask

df_result = df.mul(mult_data, axis=axis, fill_value=fill_val)
df_expected = (df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)).mask(
mask, np.nan
)

tm.assert_frame_equal(df_result, df_expected)
Loading