@@ -9025,6 +9025,52 @@ def combine(
90259025 1 0.0 3.0 1.0
90269026 2 NaN 3.0 1.0
90279027 """
9028+
9029+ # GH#62691 Prevent lossy conversion of wide integers
9030+ # by proactively promoting them to their nullable versions
9031+ # because an outer align will force a round trip through float64.
9032+ def _promote_wide_ints (df : DataFrame ) -> DataFrame :
9033+ """Promotes int64/uint64 columns to their nullable versions."""
9034+ cast_map : dict [str , str ] = {}
9035+ for col , dt in df .dtypes .items ():
9036+ if dt == np .dtype ("int64" ):
9037+ cast_map [col ] = "Int64"
9038+ elif dt == np .dtype ("uint64" ):
9039+ cast_map [col ] = "UInt64"
9040+
9041+ if cast_map :
9042+ df = df .astype (cast_map )
9043+ return df
9044+
9045+ # store originals before promotion
9046+ self_original = self
9047+ other_original = other
9048+ self = _promote_wide_ints (self )
9049+ other = _promote_wide_ints (other )
9050+
9051+ def _restore_wide_ints (df : DataFrame ):
9052+ """Restores previously int64/uint64 columns if they don't have NAs."""
9053+ cast_map : dict [str , str ] = {}
9054+ for col in df .columns :
9055+ ser = df [col ]
9056+ orig_dt_self = self_original .dtypes .get (col )
9057+ orig_dt_other = other_original .dtypes .get (col )
9058+
9059+ is_at_risk = (orig_dt_self in [np .int64 , np .uint64 ]) or (
9060+ orig_dt_other in [np .int64 , np .uint64 ]
9061+ )
9062+
9063+ if is_at_risk and not isna (ser ).any ():
9064+ dtypes_to_resolve = [
9065+ dt for dt in (orig_dt_self , orig_dt_other ) if dt is not None
9066+ ]
9067+ if dtypes_to_resolve :
9068+ cast_map [col ] = find_common_type (dtypes_to_resolve )
9069+
9070+ if cast_map :
9071+ df = df .astype (cast_map )
9072+ return df
9073+
90289074 other_idxlen = len (other .index ) # save for compare
90299075 other_columns = other .columns
90309076
@@ -9092,6 +9138,7 @@ def combine(
90929138
90939139 # convert_objects just in case
90949140 frame_result = self ._constructor (result , index = new_index , columns = new_columns )
9141+ frame_result = _restore_wide_ints (frame_result )
90959142 return frame_result .__finalize__ (self , method = "combine" )
90969143
90979144 def combine_first (self , other : DataFrame ) -> DataFrame :
@@ -9141,35 +9188,27 @@ def combine_first(self, other: DataFrame) -> DataFrame:
91419188 1 0.0 3.0 1.0
91429189 2 NaN 3.0 1.0
91439190 """
9191+ from pandas .core .computation import expressions
91449192
91459193 def combiner (x : Series , y : Series ):
9146- # GH#60128 Preserve EA dtypes by operating at the Series level.
9147- # If 'y' is a new column, return it as-is; otherwise fill <NA> in 'x'
9148- # from 'y'. Avoids dropping to NumPy arrays (which would lose
9149- # Int64/UInt64 and reintroduce float64 paths).
9150- return y if y .name not in self .columns else y .where (x .isna (), x )
9194+ mask = x .isna ()._values
9195+
9196+ x_values = x ._values
9197+ y_values = y ._values
9198+
9199+ # If the column y in other DataFrame is not in first DataFrame,
9200+ # just return y_values.
9201+ if y .name not in self .columns :
9202+ return y_values
9203+
9204+ return expressions .where (mask , y_values , x_values )
91519205
91529206 if len (other ) == 0 :
91539207 combined = self .reindex (
91549208 self .columns .append (other .columns .difference (self .columns )), axis = 1
91559209 )
91569210 combined = combined .astype (other .dtypes )
91579211 else :
9158- # GH#60128 Avoid precision loss from int64/uint64 <-> float64 round-trip.
9159- def _promote_ints_to_nullable (df : DataFrame ) -> DataFrame :
9160- cast_map : dict [str , str ] = {}
9161-
9162- for col , dt in df .dtypes .items ():
9163- if dt == np .dtype ("uint64" ):
9164- cast_map [col ] = "UInt64"
9165- elif dt == np .dtype ("int64" ):
9166- cast_map [col ] = "Int64"
9167-
9168- return df .astype (cast_map ) if cast_map else df
9169-
9170- self = _promote_ints_to_nullable (self )
9171- other = _promote_ints_to_nullable (other )
9172-
91739212 combined = self .combine (other , combiner , overwrite = False )
91749213
91759214 dtypes = {
0 commit comments