Skip to content

Commit 3ec10fc

Browse files
update typing expectations for DataFrame.combine_first
1 parent a6b461c commit 3ec10fc

File tree

1 file changed

+45
-25
lines changed

1 file changed

+45
-25
lines changed

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ def test_combine_first_mixed(self):
2626
b = Series(range(2), index=range(5, 7))
2727
g = DataFrame({"A": a, "B": b})
2828

29-
exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6])
29+
exp = DataFrame(
30+
{
31+
"A": list("abab"),
32+
"B": Series([0, 1, 0, 1], index=[0, 1, 5, 6], dtype="Int64"),
33+
}
34+
)
3035
combined = f.combine_first(g)
3136
tm.assert_frame_equal(combined, exp)
3237

@@ -52,7 +57,7 @@ def test_combine_first(self, float_frame):
5257
combined = fcopy.combine_first(fcopy2)
5358

5459
assert (combined["A"] == 1).all()
55-
tm.assert_series_equal(combined["B"], fcopy["B"])
60+
tm.assert_series_equal(combined["B"], fcopy["B"].astype("Float64"))
5661
tm.assert_series_equal(combined["C"], fcopy2["C"])
5762
tm.assert_series_equal(combined["D"], fcopy["D"])
5863

@@ -118,12 +123,15 @@ def test_combine_first_same_as_in_update(self):
118123

119124
other = DataFrame([[45, 45]], index=[0], columns=["A", "B"])
120125
result = df.combine_first(other)
121-
tm.assert_frame_equal(result, df)
126+
expected = df.copy()
127+
expected["A"] = expected["A"].astype("Float64")
128+
expected["B"] = expected["B"].astype("Float64")
129+
tm.assert_frame_equal(result, expected)
122130

123131
df.loc[0, "A"] = np.nan
124132
result = df.combine_first(other)
125-
df.loc[0, "A"] = 45
126-
tm.assert_frame_equal(result, df)
133+
expected.loc[0, "A"] = 45
134+
tm.assert_frame_equal(result, expected)
127135

128136
def test_combine_first_doc_example(self):
129137
# doc example
@@ -202,21 +210,25 @@ def test_combine_first_align_nan(self):
202210

203211
res = dfa.combine_first(dfb)
204212
exp = DataFrame(
205-
{"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]},
213+
{
214+
"a": [pd.Timestamp("2011-01-01"), pd.NaT],
215+
"b": Series([2, 5], dtype="Int64"),
216+
},
206217
columns=["a", "b"],
207218
)
208219
tm.assert_frame_equal(res, exp)
209220
assert res["a"].dtype == "datetime64[s]"
210-
# TODO: this must be int64
211-
assert res["b"].dtype == "int64"
221+
assert res["b"].dtype == "Int64"
212222

213223
res = dfa.iloc[:0].combine_first(dfb)
214-
exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"])
224+
exp = DataFrame(
225+
{"a": [np.nan, np.nan], "b": Series([4, 5], dtype="Int64")},
226+
columns=["a", "b"],
227+
)
215228
tm.assert_frame_equal(res, exp)
216229
# TODO: this must be datetime64
217230
assert res["a"].dtype == "float64"
218-
# TODO: this must be int64
219-
assert res["b"].dtype == "int64"
231+
assert res["b"].dtype == "Int64"
220232

221233
def test_combine_first_timezone(self, unit):
222234
# see gh-7630
@@ -366,21 +378,21 @@ def test_combine_first_int(self):
366378
df2 = DataFrame({"a": [1, 4]}, dtype="int64")
367379

368380
result_12 = df1.combine_first(df2)
369-
expected_12 = DataFrame({"a": [0, 1, 3, 5]})
381+
expected_12 = DataFrame({"a": Series([0, 1, 3, 5], dtype="Int64")})
370382
tm.assert_frame_equal(result_12, expected_12)
371383

372384
result_21 = df2.combine_first(df1)
373-
expected_21 = DataFrame({"a": [1, 4, 3, 5]})
385+
expected_21 = DataFrame({"a": Series([1, 4, 3, 5], dtype="Int64")})
374386
tm.assert_frame_equal(result_21, expected_21)
375387

376-
@pytest.mark.parametrize("val", [1, 1.0])
377-
def test_combine_first_with_asymmetric_other(self, val):
388+
@pytest.mark.parametrize("val,dtype", [(1, "Int64"), (1.0, "float64")])
389+
def test_combine_first_with_asymmetric_other(self, val, dtype):
378390
# see gh-20699
379391
df1 = DataFrame({"isNum": [val]})
380392
df2 = DataFrame({"isBool": [True]})
381393

382394
res = df1.combine_first(df2)
383-
exp = DataFrame({"isNum": [val], "isBool": [True]})
395+
exp = DataFrame({"isNum": Series([val], dtype=dtype), "isBool": [True]})
384396

385397
tm.assert_frame_equal(res, exp)
386398

@@ -472,9 +484,10 @@ def test_combine_first_with_nan_multiindex():
472484
expected = DataFrame(
473485
{
474486
"c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
475-
"d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
487+
"d": [1, 4, np.nan, 2, 5, np.nan, np.nan, 3, np.nan, 6, np.nan],
476488
},
477489
index=mi_expected,
490+
dtype="Int64",
478491
)
479492
tm.assert_frame_equal(res, expected)
480493

@@ -492,10 +505,9 @@ def test_combine_preserve_dtypes():
492505
expected = DataFrame(
493506
{
494507
"A": ["a", "b", np.nan, np.nan],
495-
"B": [0, 1, -1, 0],
508+
"B": Series([0, 1, -1, 0], index=[0, 1, 5, 6], dtype="Int64"),
496509
"C": [np.nan, np.nan, "a", "b"],
497-
},
498-
index=[0, 1, 5, 6],
510+
}
499511
)
500512
combined = df1.combine_first(df2)
501513
tm.assert_frame_equal(combined, expected)
@@ -515,12 +527,13 @@ def test_combine_first_duplicates_rows_for_nan_index_values():
515527

516528
expected = DataFrame(
517529
{
518-
"x": [9.0, 10.0, 11.0, np.nan],
519-
"y": [12.0, 13.0, np.nan, 14.0],
530+
"x": [9, 10, 11, np.nan],
531+
"y": [12, 13, np.nan, 14],
520532
},
521533
index=MultiIndex.from_arrays(
522534
[[1, 2, 3, 4], [np.nan, 5, 6, 7]], names=["a", "b"]
523535
),
536+
dtype="Int64",
524537
)
525538
combined = df1.combine_first(df2)
526539
tm.assert_frame_equal(combined, expected)
@@ -531,7 +544,9 @@ def test_combine_first_int64_not_cast_to_float64():
531544
df_1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
532545
df_2 = DataFrame({"A": [1, 20, 30], "B": [40, 50, 60], "C": [12, 34, 65]})
533546
result = df_1.combine_first(df_2)
534-
expected = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]})
547+
expected = DataFrame(
548+
{"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]}, dtype="Int64"
549+
)
535550
tm.assert_frame_equal(result, expected)
536551

537552

@@ -545,7 +560,7 @@ def test_midx_losing_dtype():
545560
expected_midx = MultiIndex.from_arrays(
546561
[[0, 0, 1, 1], [np.nan, np.nan, np.nan, np.nan]]
547562
)
548-
expected = DataFrame({"a": [np.nan, 4, 3, 3]}, index=expected_midx)
563+
expected = DataFrame({"a": [pd.NA, 4, 3, 3]}, index=expected_midx, dtype="Float64")
549564
tm.assert_frame_equal(result, expected)
550565

551566

@@ -563,5 +578,10 @@ def test_combine_first_preserve_column_order():
563578
df2 = DataFrame({"A": [5]}, index=[1])
564579

565580
result = df1.combine_first(df2)
566-
expected = DataFrame({"B": [1, 2, 3], "A": [4.0, 5.0, 6.0]})
581+
expected = DataFrame(
582+
{
583+
"B": Series([1, 2, 3], dtype="Int64"),
584+
"A": Series([4.0, 5.0, 6.0], dtype="Float64"),
585+
}
586+
)
567587
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)