@@ -1462,13 +1462,14 @@ def test_merge_readonly(self):
14621462
14631463def _check_merge (x , y ):
14641464 for how in ["inner" , "left" , "outer" ]:
1465- result = x .join (y , how = how )
1465+ for sort in [True , False ]:
1466+ result = x .join (y , how = how , sort = sort )
14661467
1467- expected = merge (x .reset_index (), y .reset_index (), how = how , sort = True )
1468- expected = expected .set_index ("index" )
1468+ expected = merge (x .reset_index (), y .reset_index (), how = how , sort = sort )
1469+ expected = expected .set_index ("index" )
14691470
1470- # TODO check_names on merge?
1471- tm .assert_frame_equal (result , expected , check_names = False )
1471+ # TODO check_names on merge?
1472+ tm .assert_frame_equal (result , expected , check_names = False )
14721473
14731474
14741475class TestMergeDtypes :
@@ -1751,7 +1752,7 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype):
17511752 "how, expected_data" ,
17521753 [
17531754 ("inner" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
1754- ("outer" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
1755+ ("outer" , [[False , 5 , 3 ], [True , 1 , 4 ]]),
17551756 ("left" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
17561757 ("right" , [[False , 5 , 3 ], [True , 1 , 4 ]]),
17571758 ],
@@ -2331,9 +2332,9 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):
23312332 "outer" ,
23322333 DataFrame (
23332334 {
2334- "A" : [100 , 200 , 1 , 300 ],
2335- "B1" : [60 , 70 , 80 , np .nan ],
2336- "B2" : [600 , 700 , np . nan , 800 ],
2335+ "A" : [1 , 100 , 200 , 300 ],
2336+ "B1" : [80 , 60 , 70 , np .nan ],
2337+ "B2" : [np . nan , 600 , 700 , 800 ],
23372338 }
23382339 ),
23392340 ),
@@ -2752,9 +2753,9 @@ def test_merge_outer_with_NaN(dtype):
27522753 result = merge (right , left , on = "key" , how = "outer" )
27532754 expected = DataFrame (
27542755 {
2755- "key" : [np .nan , np .nan , 1 , 2 ],
2756- "col2" : [3 , 4 , np .nan , np .nan ],
2757- "col1" : [np .nan , np .nan , 1 , 2 ],
2756+ "key" : [1 , 2 , np .nan , np .nan ],
2757+ "col2" : [np .nan , np .nan , 3 , 4 ],
2758+ "col1" : [1 , 2 , np .nan , np .nan ],
27582759 },
27592760 dtype = dtype ,
27602761 )
@@ -2847,3 +2848,79 @@ def test_merge_multiindex_single_level():
28472848
28482849 result = df .merge (df2 , left_on = ["col" ], right_index = True , how = "left" )
28492850 tm .assert_frame_equal (result , expected )
2851+
2852+
2853+ @pytest .mark .parametrize ("how" , ["left" , "right" , "inner" , "outer" ])
2854+ @pytest .mark .parametrize ("sort" , [True , False ])
2855+ @pytest .mark .parametrize ("on_index" , [True , False ])
2856+ @pytest .mark .parametrize ("left_unique" , [True , False ])
2857+ @pytest .mark .parametrize ("left_monotonic" , [True , False ])
2858+ @pytest .mark .parametrize ("right_unique" , [True , False ])
2859+ @pytest .mark .parametrize ("right_monotonic" , [True , False ])
2860+ def test_merge_combinations (
2861+ how , sort , on_index , left_unique , left_monotonic , right_unique , right_monotonic
2862+ ):
2863+ # GH 54611
2864+ left = [2 , 3 ]
2865+ if left_unique :
2866+ left .append (4 if left_monotonic else 1 )
2867+ else :
2868+ left .append (3 if left_monotonic else 2 )
2869+
2870+ right = [2 , 3 ]
2871+ if right_unique :
2872+ right .append (4 if right_monotonic else 1 )
2873+ else :
2874+ right .append (3 if right_monotonic else 2 )
2875+
2876+ left = DataFrame ({"key" : left })
2877+ right = DataFrame ({"key" : right })
2878+
2879+ if on_index :
2880+ left = left .set_index ("key" )
2881+ right = right .set_index ("key" )
2882+ on_kwargs = {"left_index" : True , "right_index" : True }
2883+ else :
2884+ on_kwargs = {"on" : "key" }
2885+
2886+ result = merge (left , right , how = how , sort = sort , ** on_kwargs )
2887+
2888+ if on_index :
2889+ left = left .reset_index ()
2890+ right = right .reset_index ()
2891+
2892+ if how in ["left" , "right" , "inner" ]:
2893+ if how in ["left" , "inner" ]:
2894+ expected , other , other_unique = left , right , right_unique
2895+ else :
2896+ expected , other , other_unique = right , left , left_unique
2897+ if how == "inner" :
2898+ keep_values = set (left ["key" ].values ).intersection (right ["key" ].values )
2899+ keep_mask = expected ["key" ].isin (keep_values )
2900+ expected = expected [keep_mask ]
2901+ if sort :
2902+ expected = expected .sort_values ("key" )
2903+ if not other_unique :
2904+ other_value_counts = other ["key" ].value_counts ()
2905+ repeats = other_value_counts .reindex (expected ["key" ].values , fill_value = 1 )
2906+ repeats = repeats .astype (np .intp )
2907+ expected = expected ["key" ].repeat (repeats .values )
2908+ expected = expected .to_frame ()
2909+ elif how == "outer" :
2910+ if on_index and left_unique and left ["key" ].equals (right ["key" ]):
2911+ expected = DataFrame ({"key" : left ["key" ]})
2912+ else :
2913+ left_counts = left ["key" ].value_counts ()
2914+ right_counts = right ["key" ].value_counts ()
2915+ expected_counts = left_counts .mul (right_counts , fill_value = 1 )
2916+ expected_counts = expected_counts .astype (np .intp )
2917+ expected = expected_counts .index .values .repeat (expected_counts .values )
2918+ expected = DataFrame ({"key" : expected })
2919+ expected = expected .sort_values ("key" )
2920+
2921+ if on_index :
2922+ expected = expected .set_index ("key" )
2923+ else :
2924+ expected = expected .reset_index (drop = True )
2925+
2926+ tm .assert_frame_equal (result , expected )
0 commit comments