Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ Numeric

Conversion
^^^^^^^^^^
- Bug in :func:`maybe_convert_objects` for large ``int`` values mixed with ``None`` (:issue:`58485`)
- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
Expand Down
13 changes: 13 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ cdef:
object oUINT64_MAX = <uint64_t>UINT64_MAX

float64_t NaN = <float64_t>np.nan
# the maximum absolute integer value that a 64-bit IEEE floating point number
# can store is when all 52 bits of its significand/mantissa are 1
# see: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
# related concept in JavaScript:
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
float64_t F64_SAFE_INT64_MAX = <float64_t>(2**53 - 1)

# python-visible
i8max = <int64_t>INT64_MAX
Expand Down Expand Up @@ -2868,6 +2874,13 @@ def maybe_convert_objects(ndarray[object] objects,
result = uints
else:
result = ints
elif (np.absolute(floats) > F64_SAFE_INT64_MAX).any():
# GH 58485
raise ValueError(
"integer values with non-nullable dtype too large "
"to be represented by float64"
", specify an integer dtype explicitly"
)
else:
result = floats
elif seen.nan_:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ def convert_dtypes(
if len(arr) < len(input_array) and not is_nan_na():
# In the presence of NaNs, we cannot convert to IntegerDtype
pass
elif (arr.astype(int) == arr).all():
elif np.array_equal(arr, np.trunc(arr), equal_nan=True):
inferred_dtype = target_int_dtype
else:
inferred_dtype = input_array.dtype
Expand All @@ -987,7 +987,7 @@ def convert_dtypes(
if len(arr) < len(input_array) and not is_nan_na():
# In the presence of NaNs, we can't convert to IntegerDtype
inferred_dtype = inferred_float_dtype
elif (arr.astype(int) == arr).all():
elif np.array_equal(arr, np.trunc(arr)):
inferred_dtype = pandas_dtype_func("Int64")
else:
inferred_dtype = inferred_float_dtype
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,20 @@ def test_boolean_dtype(self, data, skipna, index_or_series_or_array):
inferred = lib.infer_dtype(val, skipna=skipna)
assert inferred == "boolean"

def test_large_non_nullable_integer_objects(self):
# GH 58485
arr = np.array(
[
-9223372036854775808,
4611686018427387904,
9223372036854775807,
None,
],
dtype="object",
)
with pytest.raises(ValueError, match="too large to be represented by float64"):
lib.maybe_convert_objects(arr)


class TestNumberScalar:
def test_is_number(self):
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2793,6 +2793,31 @@ def __new__(cls, input_array):
expected = DataFrame(np.eye(3))
tm.assert_frame_equal(df, expected)

def test_large_non_nullable_integer_objects(self):
# GH 58485
data = {
"a": [
-9223372036854775808,
4611686018427387904,
9223372036854775807,
None,
],
"b": [
-9223372036854775808,
4611686018427387904,
9223372036854775807,
None,
],
"c": [
-9223372036854775808,
4611686018427387904,
9223372036854775807,
None,
],
}
with pytest.raises(ValueError, match="too large to be represented by float64"):
DataFrame(data)


class TestDataFrameConstructorIndexInference:
def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
Expand Down
Loading