Skip to content

Commit c8ba339

Browse files
add targeted casting to combine_first
1 parent c4449f0 commit c8ba339

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

pandas/core/frame.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9141,27 +9141,56 @@ def combine_first(self, other: DataFrame) -> DataFrame:
91419141
1 0.0 3.0 1.0
91429142
2 NaN 3.0 1.0
91439143
"""
9144-
from pandas.core.computation import expressions
9144+
from pandas.core.dtypes.common import (
9145+
is_extension_array_dtype,
9146+
is_integer_dtype,
9147+
is_unsigned_integer_dtype,
9148+
)
91459149

91469150
def combiner(x: Series, y: Series):
9147-
mask = x.isna()._values
9148-
9149-
x_values = x._values
9150-
y_values = y._values
9151+
mask = x.isna()
91519152

91529153
# If the column y in other DataFrame is not in first DataFrame,
9153-
# just return y_values.
9154+
# just return y.
91549155
if y.name not in self.columns:
9155-
return y_values
9156+
return y
91569157

9157-
return expressions.where(mask, y_values, x_values)
9158+
return y.where(mask, x)
91589159

91599160
if len(other) == 0:
91609161
combined = self.reindex(
91619162
self.columns.append(other.columns.difference(self.columns)), axis=1
91629163
)
91639164
combined = combined.astype(other.dtypes)
91649165
else:
9166+
# GH #60128
9167+
# Promote large 64-bit integers to their nullable types.
9168+
# Without this, precision will be lost in a float64 rount-trip.
9169+
def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame:
9170+
BOUND = 2**53
9171+
cast_map: dict[str, str] = {}
9172+
for col, dt in df.dtypes.items():
9173+
if is_integer_dtype(dt) and not is_extension_array_dtype(dt):
9174+
ser = df[col]
9175+
if ser.size == 0:
9176+
continue
9177+
if is_unsigned_integer_dtype(dt):
9178+
if ser.max() >= BOUND:
9179+
# promote large uint64 to nullable UInt64
9180+
cast_map[col] = "UInt64"
9181+
else:
9182+
if ser.max() >= BOUND or ser.min() <= -BOUND:
9183+
# promote large int64 to nullable Int64
9184+
cast_map[col] = "Int64"
9185+
return df.astype(cast_map) if cast_map else df
9186+
9187+
# Only cast frames whose index expand to the union (i.e., get <NA> on align)
9188+
union_index = self.index.union(other.index)
9189+
if not self.index.equals(union_index):
9190+
self = _cast_large_numpy_ints_to_nullable(self)
9191+
if not other.index.equals(union_index):
9192+
other = _cast_large_numpy_ints_to_nullable(other)
9193+
91659194
combined = self.combine(other, combiner, overwrite=False)
91669195

91679196
dtypes = {

0 commit comments

Comments
 (0)