Skip to content

Commit 6e77fc9

Browse files
refactor
1 parent ae99b3d commit 6e77fc9

File tree

1 file changed

+19
-27
lines changed

1 file changed

+19
-27
lines changed

pandas/core/frame.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9141,45 +9141,37 @@ def combine_first(self, other: DataFrame) -> DataFrame:
91419141
1 0.0 3.0 1.0
91429142
2 NaN 3.0 1.0
91439143
"""
9144-
from pandas.core.dtypes.common import (
9145-
is_extension_array_dtype,
9146-
is_integer_dtype,
9147-
is_unsigned_integer_dtype,
9148-
)
91499144

91509145
def combiner(x: Series, y: Series):
9151-
mask = x.isna()
9152-
9153-
# If the column y in other DataFrame is not in first DataFrame,
9154-
# just return y.
9155-
if y.name not in self.columns:
9156-
return y
9157-
9158-
return y.where(mask, x)
9146+
# GH#60128 Preserve EA dtypes by operating at the Series level.
9147+
# If 'y' is a new column, return it as-is; otherwise fill <NA> in 'x'
9148+
# from 'y'. Avoids dropping to NumPy arrays (which would lose
9149+
# Int64/UInt64 and reintroduce float64 paths).
9150+
return y if y.name not in self.columns else y.where(x.isna(), x)
91599151

91609152
if len(other) == 0:
91619153
combined = self.reindex(
91629154
self.columns.append(other.columns.difference(self.columns)), axis=1
91639155
)
91649156
combined = combined.astype(other.dtypes)
91659157
else:
9166-
# GH#60128 Avoid lossy conversion to float64
9158+
# GH#60128 Avoid precision loss from int64/uint64 -> float64 round-trip.
9159+
# Promote NumPy int64/uint64 to nullable Int64/UInt64 only when values
9160+
# exceed float64's exact range (|x| >= 2**53). This keeps alignment that
9161+
# introduces <NA> from forcing a lossy cast.
91679162
def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame:
9168-
BOUND = 2**53
9163+
BOUND = 2**53 # first non-exact integer for float64
91699164
cast_map: dict[str, str] = {}
9165+
91709166
for col, dt in df.dtypes.items():
9171-
if is_integer_dtype(dt) and not is_extension_array_dtype(dt):
9172-
ser = df[col]
9173-
if ser.size == 0:
9174-
continue
9175-
if is_unsigned_integer_dtype(dt):
9176-
if ser.max() >= BOUND:
9177-
# promote large uint64 to nullable UInt64
9178-
cast_map[col] = "UInt64"
9179-
else:
9180-
if ser.max() >= BOUND or ser.min() <= -BOUND:
9181-
# promote large int64 to nullable Int64
9182-
cast_map[col] = "Int64"
9167+
ser = df[col]
9168+
if dt == np.dtype("uint64"):
9169+
if ser.size and ser.max() >= BOUND:
9170+
cast_map[col] = "UInt64"
9171+
elif dt == np.dtype("int64"):
9172+
if ser.size and (ser.max() >= BOUND or ser.min() <= -BOUND):
9173+
cast_map[col] = "Int64"
9174+
91839175
return df.astype(cast_map) if cast_map else df
91849176

91859177
# Cast any side that will gain rows on outer align (introduces <NA>).

0 commit comments

Comments
 (0)