@@ -26,7 +26,12 @@ def test_combine_first_mixed(self):
2626 b = Series (range (2 ), index = range (5 , 7 ))
2727 g = DataFrame ({"A" : a , "B" : b })
2828
29- exp = DataFrame ({"A" : list ("abab" ), "B" : [0 , 1 , 0 , 1 ]}, index = [0 , 1 , 5 , 6 ])
29+ exp = DataFrame (
30+ {
31+ "A" : list ("abab" ),
32+ "B" : Series ([0 , 1 , 0 , 1 ], index = [0 , 1 , 5 , 6 ], dtype = "Int64" ),
33+ }
34+ )
3035 combined = f .combine_first (g )
3136 tm .assert_frame_equal (combined , exp )
3237
@@ -52,7 +57,7 @@ def test_combine_first(self, float_frame):
5257 combined = fcopy .combine_first (fcopy2 )
5358
5459 assert (combined ["A" ] == 1 ).all ()
55- tm .assert_series_equal (combined ["B" ], fcopy ["B" ])
60+ tm .assert_series_equal (combined ["B" ], fcopy ["B" ]. astype ( "Float64" ) )
5661 tm .assert_series_equal (combined ["C" ], fcopy2 ["C" ])
5762 tm .assert_series_equal (combined ["D" ], fcopy ["D" ])
5863
@@ -118,12 +123,15 @@ def test_combine_first_same_as_in_update(self):
118123
119124 other = DataFrame ([[45 , 45 ]], index = [0 ], columns = ["A" , "B" ])
120125 result = df .combine_first (other )
121- tm .assert_frame_equal (result , df )
126+ expected = df .copy ()
127+ expected ["A" ] = expected ["A" ].astype ("Float64" )
128+ expected ["B" ] = expected ["B" ].astype ("Float64" )
129+ tm .assert_frame_equal (result , expected )
122130
123131 df .loc [0 , "A" ] = np .nan
124132 result = df .combine_first (other )
125- df .loc [0 , "A" ] = 45
126- tm .assert_frame_equal (result , df )
133+ expected .loc [0 , "A" ] = 45
134+ tm .assert_frame_equal (result , expected )
127135
128136 def test_combine_first_doc_example (self ):
129137 # doc example
@@ -202,21 +210,25 @@ def test_combine_first_align_nan(self):
202210
203211 res = dfa .combine_first (dfb )
204212 exp = DataFrame (
205- {"a" : [pd .Timestamp ("2011-01-01" ), pd .NaT ], "b" : [2 , 5 ]},
213+ {
214+ "a" : [pd .Timestamp ("2011-01-01" ), pd .NaT ],
215+ "b" : Series ([2 , 5 ], dtype = "Int64" ),
216+ },
206217 columns = ["a" , "b" ],
207218 )
208219 tm .assert_frame_equal (res , exp )
209220 assert res ["a" ].dtype == "datetime64[s]"
210- # TODO: this must be int64
211- assert res ["b" ].dtype == "int64"
221+ assert res ["b" ].dtype == "Int64"
212222
213223 res = dfa .iloc [:0 ].combine_first (dfb )
214- exp = DataFrame ({"a" : [np .nan , np .nan ], "b" : [4 , 5 ]}, columns = ["a" , "b" ])
224+ exp = DataFrame (
225+ {"a" : [np .nan , np .nan ], "b" : Series ([4 , 5 ], dtype = "Int64" )},
226+ columns = ["a" , "b" ],
227+ )
215228 tm .assert_frame_equal (res , exp )
216229 # TODO: this must be datetime64
217230 assert res ["a" ].dtype == "float64"
218- # TODO: this must be int64
219- assert res ["b" ].dtype == "int64"
231+ assert res ["b" ].dtype == "Int64"
220232
221233 def test_combine_first_timezone (self , unit ):
222234 # see gh-7630
@@ -366,21 +378,21 @@ def test_combine_first_int(self):
366378 df2 = DataFrame ({"a" : [1 , 4 ]}, dtype = "int64" )
367379
368380 result_12 = df1 .combine_first (df2 )
369- expected_12 = DataFrame ({"a" : [0 , 1 , 3 , 5 ]})
381+ expected_12 = DataFrame ({"a" : Series ( [0 , 1 , 3 , 5 ], dtype = "Int64" ) })
370382 tm .assert_frame_equal (result_12 , expected_12 )
371383
372384 result_21 = df2 .combine_first (df1 )
373- expected_21 = DataFrame ({"a" : [1 , 4 , 3 , 5 ]})
385+ expected_21 = DataFrame ({"a" : Series ( [1 , 4 , 3 , 5 ], dtype = "Int64" ) })
374386 tm .assert_frame_equal (result_21 , expected_21 )
375387
376- @pytest .mark .parametrize ("val" , [1 , 1.0 ])
377- def test_combine_first_with_asymmetric_other (self , val ):
388+ @pytest .mark .parametrize ("val,dtype " , [( 1 , "Int64" ), ( 1.0 , "float64" ) ])
389+ def test_combine_first_with_asymmetric_other (self , val , dtype ):
378390 # see gh-20699
379391 df1 = DataFrame ({"isNum" : [val ]})
380392 df2 = DataFrame ({"isBool" : [True ]})
381393
382394 res = df1 .combine_first (df2 )
383- exp = DataFrame ({"isNum" : [val ], "isBool" : [True ]})
395+ exp = DataFrame ({"isNum" : Series ( [val ], dtype = dtype ) , "isBool" : [True ]})
384396
385397 tm .assert_frame_equal (res , exp )
386398
@@ -472,9 +484,10 @@ def test_combine_first_with_nan_multiindex():
472484 expected = DataFrame (
473485 {
474486 "c" : [np .nan , np .nan , 1 , 1 , 1 , 1 , 1 , np .nan , 1 , np .nan , 1 ],
475- "d" : [1.0 , 4.0 , np .nan , 2.0 , 5.0 , np .nan , np .nan , 3.0 , np .nan , 6.0 , np .nan ],
487+ "d" : [1 , 4 , np .nan , 2 , 5 , np .nan , np .nan , 3 , np .nan , 6 , np .nan ],
476488 },
477489 index = mi_expected ,
490+ dtype = "Int64" ,
478491 )
479492 tm .assert_frame_equal (res , expected )
480493
@@ -492,10 +505,9 @@ def test_combine_preserve_dtypes():
492505 expected = DataFrame (
493506 {
494507 "A" : ["a" , "b" , np .nan , np .nan ],
495- "B" : [0 , 1 , - 1 , 0 ],
508+ "B" : Series ( [0 , 1 , - 1 , 0 ], index = [ 0 , 1 , 5 , 6 ], dtype = "Int64" ) ,
496509 "C" : [np .nan , np .nan , "a" , "b" ],
497- },
498- index = [0 , 1 , 5 , 6 ],
510+ }
499511 )
500512 combined = df1 .combine_first (df2 )
501513 tm .assert_frame_equal (combined , expected )
@@ -515,12 +527,13 @@ def test_combine_first_duplicates_rows_for_nan_index_values():
515527
516528 expected = DataFrame (
517529 {
518- "x" : [9.0 , 10.0 , 11.0 , np .nan ],
519- "y" : [12.0 , 13.0 , np .nan , 14.0 ],
530+ "x" : [9 , 10 , 11 , np .nan ],
531+ "y" : [12 , 13 , np .nan , 14 ],
520532 },
521533 index = MultiIndex .from_arrays (
522534 [[1 , 2 , 3 , 4 ], [np .nan , 5 , 6 , 7 ]], names = ["a" , "b" ]
523535 ),
536+ dtype = "Int64" ,
524537 )
525538 combined = df1 .combine_first (df2 )
526539 tm .assert_frame_equal (combined , expected )
@@ -531,7 +544,9 @@ def test_combine_first_int64_not_cast_to_float64():
531544 df_1 = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]})
532545 df_2 = DataFrame ({"A" : [1 , 20 , 30 ], "B" : [40 , 50 , 60 ], "C" : [12 , 34 , 65 ]})
533546 result = df_1 .combine_first (df_2 )
534- expected = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ], "C" : [12 , 34 , 65 ]})
547+ expected = DataFrame (
548+ {"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ], "C" : [12 , 34 , 65 ]}, dtype = "Int64"
549+ )
535550 tm .assert_frame_equal (result , expected )
536551
537552
@@ -545,7 +560,7 @@ def test_midx_losing_dtype():
545560 expected_midx = MultiIndex .from_arrays (
546561 [[0 , 0 , 1 , 1 ], [np .nan , np .nan , np .nan , np .nan ]]
547562 )
548- expected = DataFrame ({"a" : [np . nan , 4 , 3 , 3 ]}, index = expected_midx )
563+ expected = DataFrame ({"a" : [pd . NA , 4 , 3 , 3 ]}, index = expected_midx , dtype = "Float64" )
549564 tm .assert_frame_equal (result , expected )
550565
551566
@@ -563,5 +578,10 @@ def test_combine_first_preserve_column_order():
563578 df2 = DataFrame ({"A" : [5 ]}, index = [1 ])
564579
565580 result = df1 .combine_first (df2 )
566- expected = DataFrame ({"B" : [1 , 2 , 3 ], "A" : [4.0 , 5.0 , 6.0 ]})
581+ expected = DataFrame (
582+ {
583+ "B" : Series ([1 , 2 , 3 ], dtype = "Int64" ),
584+ "A" : Series ([4.0 , 5.0 , 6.0 ], dtype = "Float64" ),
585+ }
586+ )
567587 tm .assert_frame_equal (result , expected )
0 commit comments