From aaf1143533c9fed0497683e7c898e921a8125b83 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Fri, 17 Oct 2025 00:00:12 -0400 Subject: [PATCH 1/3] update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..be8ba7af2e5e2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1001,6 +1001,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :func:`maybe_convert_objects` for large ``int`` values mixed with ``None`` (:issue:`58485`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) From f1e7ac5a11690a40b81b18af36c421336d253296 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Fri, 17 Oct 2025 00:00:57 -0400 Subject: [PATCH 2/3] fix numpy warnings when int64 does not fit into float64 --- pandas/_libs/lib.pyx | 8 ++++++++ pandas/core/dtypes/cast.py | 4 ++-- pandas/tests/dtypes/test_inference.py | 14 ++++++++++++++ pandas/tests/frame/test_constructors.py | 25 +++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 83a1b09f00a11..2652c04f9d2bb 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -116,6 +116,11 @@ cdef: object oUINT64_MAX = UINT64_MAX float64_t NaN = np.nan + # the maximum absolute integer value that a 64-bit IEEE floating point number can store is when all 52 bits of its significand/mantissa are 1 + # see: https://en.wikipedia.org/wiki/Double-precision_floating-point_format + # related concept in JavaScript: + # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER + float64_t F64_SAFE_INT64_MAX = (2**53 - 1) # python-visible i8max = INT64_MAX @@ -2868,6 +2873,9 @@ def maybe_convert_objects(ndarray[object] objects, result = uints else: result = ints + elif (np.absolute(floats) > F64_SAFE_INT64_MAX).any(): + # GH 58485 + raise ValueError("integer values with non-nullable dtype too large to be represented by float64, specify an integer dtype explicitly") else: result = floats elif seen.nan_: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3b615c70ebea2..2c4221b52bcd3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -960,7 +960,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we cannot convert to IntegerDtype pass - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr), equal_nan=True): inferred_dtype = target_int_dtype else: inferred_dtype = input_array.dtype @@ -987,7 +987,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we can't convert to IntegerDtype inferred_dtype = inferred_float_dtype - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr)): inferred_dtype = pandas_dtype_func("Int64") else: inferred_dtype = inferred_float_dtype diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0955912e12c8..363928e3e611c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1747,6 +1747,20 @@ def test_boolean_dtype(self, data, skipna, index_or_series_or_array): inferred = lib.infer_dtype(val, skipna=skipna) assert inferred == "boolean" + def test_large_non_nullable_integer_objects(self): + # GH 58485 + arr = np.array( + [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + dtype="object", + ) + with pytest.raises(ValueError, match="too large to be represented by float64"): + lib.maybe_convert_objects(arr) + class TestNumberScalar: def test_is_number(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..cae5fb25c1f20 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2793,6 +2793,31 @@ def __new__(cls, input_array): expected = DataFrame(np.eye(3)) tm.assert_frame_equal(df, expected) + def test_large_non_nullable_integer_objects(self): + # GH 58485 + data = { + "a": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "b": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "c": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + } + with pytest.raises(ValueError, match="too large to be represented by float64"): + pd.DataFrame(data) + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From b029eb88c5ec1480d2b26c1e876c77c9032844ac Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Fri, 17 Oct 2025 00:03:57 -0400 Subject: [PATCH 3/3] fix precommit --- pandas/_libs/lib.pyx | 9 +++++++-- pandas/tests/frame/test_constructors.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 2652c04f9d2bb..b07980fd2b4bb 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -116,7 +116,8 @@ cdef: object oUINT64_MAX = UINT64_MAX float64_t NaN = np.nan - # the maximum absolute integer value that a 64-bit IEEE floating point number can store is when all 52 bits of its significand/mantissa are 1 + # the maximum absolute integer value that a 64-bit IEEE floating point number + # can store is when all 52 bits of its significand/mantissa are 1 # see: https://en.wikipedia.org/wiki/Double-precision_floating-point_format # related concept in JavaScript: # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER @@ -2875,7 +2876,11 @@ def maybe_convert_objects(ndarray[object] objects, result = ints elif (np.absolute(floats) > F64_SAFE_INT64_MAX).any(): # GH 58485 - raise ValueError("integer values with non-nullable dtype too large to be represented by float64, specify an integer dtype explicitly") + raise ValueError( + "integer values with non-nullable dtype too large " + "to be represented by float64" + ", specify an integer dtype explicitly" + ) else: result = floats elif seen.nan_: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cae5fb25c1f20..6b87ae5e447dd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2816,7 +2816,7 @@ def test_large_non_nullable_integer_objects(self): ], } with pytest.raises(ValueError, match="too large to be represented by float64"): - pd.DataFrame(data) + DataFrame(data) class TestDataFrameConstructorIndexInference: