From a9380f32f9e1021502bded45a998952fd9df845d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:39:41 +0000 Subject: [PATCH 1/5] Initial plan From 7094cf2ee90598eb921bb783f017fe429dad1262 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:58:24 +0000 Subject: [PATCH 2/5] Add automatic fallback to coarser units for OutOfBoundsDatetime - Added helper function get_next_coarser_unit for unit fallback sequence - Modified array_strptime to wrap implementation with fallback logic - Modified array_to_datetime similarly - Added tests for fallback behavior - Need to fix issue with all-NaT case and ensure proper resolution handling Co-authored-by: jbrockmendel <8078968+jbrockmendel@users.noreply.github.com> --- pandas/_libs/tslib.pyx | 69 ++++++++++++++++++- pandas/_libs/tslibs/strptime.pyx | 63 ++++++++++++++++- pandas/tests/tslibs/test_array_to_datetime.py | 21 ++++++ pandas/tests/tslibs/test_strptime.py | 26 ++++++- 4 files changed, 175 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3c5854602df53..446f822616256 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -55,6 +55,23 @@ from pandas._libs.util cimport ( from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso): + """ + Get the next coarser unit in the sequence: ns -> us -> ms -> s + Returns NPY_FR_GENERIC if there is no coarser unit available. + """ + if creso == NPY_DATETIMEUNIT.NPY_FR_ns: + return NPY_DATETIMEUNIT.NPY_FR_us + elif creso == NPY_DATETIMEUNIT.NPY_FR_us: + return NPY_DATETIMEUNIT.NPY_FR_ms + elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: + return NPY_DATETIMEUNIT.NPY_FR_s + else: + # No coarser unit available + return NPY_DATETIMEUNIT.NPY_FR_GENERIC + + from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, @@ -301,6 +318,55 @@ cpdef array_to_datetime( May be datetime64[creso_unit] or object dtype tzinfo or None """ + # Try to parse with the given resolution, falling back to coarser units if needed + cdef: + NPY_DATETIMEUNIT fallback_creso = creso + NPY_DATETIMEUNIT original_creso = creso + bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC + + while True: + try: + return _array_to_datetime_impl( + values, errors, dayfirst, yearfirst, utc, fallback_creso, unit_for_numerics + ) + except (OutOfBoundsDatetime, OverflowError): + # Only attempt fallback if we're in inference mode or creso is one + # of the finer resolutions (ns, us, ms) + if not infer_reso and original_creso not in ( + NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT.NPY_FR_us, + NPY_DATETIMEUNIT.NPY_FR_ms, + ): + # User explicitly requested a coarse resolution, don't fall back + raise + + # If we're in inference mode and haven't set a fallback yet, + # start from nanoseconds + if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns + + # Try the next coarser unit + fallback_creso = get_next_coarser_unit(fallback_creso) + if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # No coarser unit available, re-raise the error + raise + # Continue with coarser unit + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef _array_to_datetime_impl( + ndarray values, # object dtype, arbitrary ndim + str errors, + bint dayfirst, + bint yearfirst, + bint utc, + NPY_DATETIMEUNIT creso, + str unit_for_numerics, +): + """ + Internal implementation of array_to_datetime with a specific resolution. + """ cdef: Py_ssize_t i, n = values.size object val @@ -453,13 +519,14 @@ cpdef array_to_datetime( if state.creso_ever_changed: # We encountered mismatched resolutions, need to re-parse with # the correct one. - return array_to_datetime( + return _array_to_datetime_impl( values, errors=errors, yearfirst=yearfirst, dayfirst=dayfirst, utc=utc, creso=state.creso, + unit_for_numerics=unit_for_numerics, ) elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # i.e. we never encountered anything non-NaT, default to "s". This diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 025cd6c04cb69..b8a37efda6485 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -89,6 +89,22 @@ from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single cnp.import_array() +cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso): + """ + Get the next coarser unit in the sequence: ns -> us -> ms -> s + Returns NPY_FR_GENERIC if there is no coarser unit available. + """ + if creso == NPY_DATETIMEUNIT.NPY_FR_ns: + return NPY_DATETIMEUNIT.NPY_FR_us + elif creso == NPY_DATETIMEUNIT.NPY_FR_us: + return NPY_DATETIMEUNIT.NPY_FR_ms + elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: + return NPY_DATETIMEUNIT.NPY_FR_s + else: + # No coarser unit available + return NPY_DATETIMEUNIT.NPY_FR_GENERIC + + cdef bint format_is_iso(f: str): """ Does format match the iso8601 set that can be handled by the C parser? @@ -368,7 +384,52 @@ def array_strptime( creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC Set to NPY_FR_GENERIC to infer a resolution. """ + # Try to parse with the given resolution, falling back to coarser units if needed + cdef: + NPY_DATETIMEUNIT fallback_creso = creso + NPY_DATETIMEUNIT original_creso = creso + bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC + + while True: + try: + return _array_strptime_impl( + values, fmt, exact, errors, utc, fallback_creso + ) + except OutOfBoundsDatetime: + # Only attempt fallback if we're in inference mode or creso is one + # of the finer resolutions (ns, us, ms) + if not infer_reso and original_creso not in ( + NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT.NPY_FR_us, + NPY_DATETIMEUNIT.NPY_FR_ms, + ): + # User explicitly requested a coarse resolution, don't fall back + raise + + # If we're in inference mode and haven't set a fallback yet, + # start from nanoseconds + if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns + + # Try the next coarser unit + fallback_creso = get_next_coarser_unit(fallback_creso) + if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # No coarser unit available, re-raise the error + raise + # Continue with coarser unit + +cdef _array_strptime_impl( + ndarray[object] values, + str fmt, + bint exact, + str errors, + bint utc, + NPY_DATETIMEUNIT creso, +): + """ + Internal implementation of array_strptime with a specific resolution. + """ cdef: Py_ssize_t i, n = len(values) npy_datetimestruct dts @@ -565,7 +626,7 @@ def array_strptime( if state.creso_ever_changed: # We encountered mismatched resolutions, need to re-parse with # the correct one. - return array_strptime( + return _array_strptime_impl( values, fmt=fmt, exact=exact, diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index fc0000553049e..82cd0f2a07fc5 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -105,6 +105,27 @@ def test_infer_with_nat_int_float_str(self, item): assert tz2 is None tm.assert_numpy_array_equal(result2, expected[::-1]) + def test_array_to_datetime_fallback_to_us(self): + # Test automatic fallback from nanoseconds to microseconds + # Year 2401 is outside nanosecond range but within microsecond range + vals = np.array(["2401-09-15", "2400-01-01"], dtype=object) + result, tz = tslib.array_to_datetime(vals, creso=creso_infer) + assert tz is None + assert result.dtype == np.dtype("M8[us]") + expected = np.array(["2401-09-15", "2400-01-01"], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + def test_array_to_datetime_fallback_mixed_in_nano_and_out(self): + # Test automatic fallback when one value is in nano range and one is out + # This ensures the entire array is parsed with the same coarser unit + vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) + result, tz = tslib.array_to_datetime(vals, creso=creso_infer) + assert tz is None + # Both values should be in microseconds since one is out of nano range + assert result.dtype == np.dtype("M8[us]") + expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + class TestArrayToDatetimeWithTZResolutionInference: def test_array_to_datetime_with_tz_resolution(self): diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index d726006b03f6d..c3feac5490650 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -96,15 +96,37 @@ def test_array_strptime_resolution_todaynow(self): assert res2[0] == vals[1] def test_array_strptime_str_outside_nano_range(self): + # Date is outside nanosecond range, should fallback to microseconds vals = np.array(["2401-09-15"], dtype=object) - expected = np.array(["2401-09-15"], dtype="M8[s]") + expected = np.array(["2401-09-15"], dtype="M8[us]") fmt = "ISO8601" res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) # non-iso -> different path vals2 = np.array(["Sep 15, 2401"], dtype=object) - expected2 = np.array(["2401-09-15"], dtype="M8[s]") + expected2 = np.array(["2401-09-15"], dtype="M8[us]") fmt2 = "%b %d, %Y" res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer) tm.assert_numpy_array_equal(res2, expected2) + + def test_array_strptime_fallback_to_us(self): + # Test automatic fallback from nanoseconds to microseconds + # Year 2401 is outside nanosecond range but within microsecond range + vals = np.array(["2401-09-15", "2400-01-01"], dtype=object) + fmt = "ISO8601" + res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) + assert res.dtype == np.dtype("M8[us]") + expected = np.array(["2401-09-15", "2400-01-01"], dtype="M8[us]") + tm.assert_numpy_array_equal(res, expected) + + def test_array_strptime_fallback_mixed_in_nano_and_out(self): + # Test automatic fallback when one value is in nano range and one is out + # This ensures the entire array is parsed with the same coarser unit + vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) + fmt = "ISO8601" + res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) + # Both values should be in microseconds since one is out of nano range + assert res.dtype == np.dtype("M8[us]") + expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]") + tm.assert_numpy_array_equal(res, expected) From 3c2d9ab6fcb5738f18960f5504630a6c164779c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:06:57 +0000 Subject: [PATCH 3/5] Fix fallback logic and add comprehensive tests - Fixed fallback logic to retry with ns first before falling back to coarser units - Fixed unit_for_numerics handling in recursive calls - Added tests for explicit ns resolution fallback - All strptime and array_to_datetime tests now pass Co-authored-by: jbrockmendel <8078968+jbrockmendel@users.noreply.github.com> --- pandas/_libs/tslib.pyx | 11 +++++-- pandas/_libs/tslibs/strptime.pyx | 4 ++- pandas/tests/tslibs/test_array_to_datetime.py | 26 ++++++++++------ pandas/tests/tslibs/test_strptime.py | 31 +++++++++++-------- 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 446f822616256..c5392e2ea42a7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -326,8 +326,11 @@ cpdef array_to_datetime( while True: try: + # When falling back to coarser units, don't pass unit_for_numerics + # because it's meant for ns resolution only + fallback_unit_for_numerics = unit_for_numerics if fallback_creso == original_creso else None return _array_to_datetime_impl( - values, errors, dayfirst, yearfirst, utc, fallback_creso, unit_for_numerics + values, errors, dayfirst, yearfirst, utc, fallback_creso, fallback_unit_for_numerics ) except (OutOfBoundsDatetime, OverflowError): # Only attempt fallback if we're in inference mode or creso is one @@ -341,9 +344,11 @@ cpdef array_to_datetime( raise # If we're in inference mode and haven't set a fallback yet, - # start from nanoseconds + # start from nanoseconds for the first retry if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns + # Continue to retry with ns + continue # Try the next coarser unit fallback_creso = get_next_coarser_unit(fallback_creso) @@ -526,7 +531,7 @@ cdef _array_to_datetime_impl( dayfirst=dayfirst, utc=utc, creso=state.creso, - unit_for_numerics=unit_for_numerics, + unit_for_numerics=None, ) elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # i.e. we never encountered anything non-NaT, default to "s". This diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index b8a37efda6485..8219cc0f220f2 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -407,9 +407,11 @@ def array_strptime( raise # If we're in inference mode and haven't set a fallback yet, - # start from nanoseconds + # start from nanoseconds for the first retry if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns + # Continue to retry with ns + continue # Try the next coarser unit fallback_creso = get_next_coarser_unit(fallback_creso) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 82cd0f2a07fc5..0c513af5da30a 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -105,21 +105,27 @@ def test_infer_with_nat_int_float_str(self, item): assert tz2 is None tm.assert_numpy_array_equal(result2, expected[::-1]) - def test_array_to_datetime_fallback_to_us(self): - # Test automatic fallback from nanoseconds to microseconds - # Year 2401 is outside nanosecond range but within microsecond range - vals = np.array(["2401-09-15", "2400-01-01"], dtype=object) - result, tz = tslib.array_to_datetime(vals, creso=creso_infer) + def test_array_to_datetime_fallback_to_us_explicit_ns(self): + # Test automatic fallback from explicit nanoseconds to microseconds + # When explicitly requesting ns resolution, year 2401 should fall back to us + from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + creso_ns = NpyDatetimeUnit.NPY_FR_ns.value + + vals = np.array(["2401-09-15"], dtype=object) + result, tz = tslib.array_to_datetime(vals, creso=creso_ns) assert tz is None assert result.dtype == np.dtype("M8[us]") - expected = np.array(["2401-09-15", "2400-01-01"], dtype="M8[us]") + expected = np.array(["2401-09-15"], dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) - def test_array_to_datetime_fallback_mixed_in_nano_and_out(self): - # Test automatic fallback when one value is in nano range and one is out - # This ensures the entire array is parsed with the same coarser unit + def test_array_to_datetime_fallback_mixed_explicit_ns(self): + # Test automatic fallback with explicit ns resolution + # When one value is in nano range and one is out, both should use coarser unit + from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + creso_ns = NpyDatetimeUnit.NPY_FR_ns.value + vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) - result, tz = tslib.array_to_datetime(vals, creso=creso_infer) + result, tz = tslib.array_to_datetime(vals, creso=creso_ns) assert tz is None # Both values should be in microseconds since one is out of nano range assert result.dtype == np.dtype("M8[us]") diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index c3feac5490650..753ee7ba6ea0a 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -96,36 +96,41 @@ def test_array_strptime_resolution_todaynow(self): assert res2[0] == vals[1] def test_array_strptime_str_outside_nano_range(self): - # Date is outside nanosecond range, should fallback to microseconds vals = np.array(["2401-09-15"], dtype=object) - expected = np.array(["2401-09-15"], dtype="M8[us]") + expected = np.array(["2401-09-15"], dtype="M8[s]") fmt = "ISO8601" res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) # non-iso -> different path vals2 = np.array(["Sep 15, 2401"], dtype=object) - expected2 = np.array(["2401-09-15"], dtype="M8[us]") + expected2 = np.array(["2401-09-15"], dtype="M8[s]") fmt2 = "%b %d, %Y" res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer) tm.assert_numpy_array_equal(res2, expected2) - def test_array_strptime_fallback_to_us(self): - # Test automatic fallback from nanoseconds to microseconds - # Year 2401 is outside nanosecond range but within microsecond range - vals = np.array(["2401-09-15", "2400-01-01"], dtype=object) + def test_array_strptime_fallback_to_us_explicit_ns(self): + # Test automatic fallback from explicit nanoseconds to microseconds + # When explicitly requesting ns resolution, year 2401 should fall back to us + from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + creso_ns = NpyDatetimeUnit.NPY_FR_ns.value + + vals = np.array(["2401-09-15"], dtype=object) fmt = "ISO8601" - res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) + res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns) assert res.dtype == np.dtype("M8[us]") - expected = np.array(["2401-09-15", "2400-01-01"], dtype="M8[us]") + expected = np.array(["2401-09-15"], dtype="M8[us]") tm.assert_numpy_array_equal(res, expected) - def test_array_strptime_fallback_mixed_in_nano_and_out(self): - # Test automatic fallback when one value is in nano range and one is out - # This ensures the entire array is parsed with the same coarser unit + def test_array_strptime_fallback_mixed_explicit_ns(self): + # Test automatic fallback with explicit ns resolution + # When one value is in nano range and one is out, both should use coarser unit + from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + creso_ns = NpyDatetimeUnit.NPY_FR_ns.value + vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) fmt = "ISO8601" - res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) + res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns) # Both values should be in microseconds since one is out of nano range assert res.dtype == np.dtype("M8[us]") expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]") From f091f9c5fcaec4cb629b1aa6285bc45718682c07 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:38:48 +0000 Subject: [PATCH 4/5] Fix inaccurate comment about unit_for_numerics Updated comment to correctly explain that unit_for_numerics is set to None during fallback so it will be computed from the fallback_creso, rather than incorrectly stating it's "meant for ns resolution only". Co-authored-by: jbrockmendel <8078968+jbrockmendel@users.noreply.github.com> --- pandas/_libs/tslib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c5392e2ea42a7..1c11a99d6795f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -326,8 +326,8 @@ cpdef array_to_datetime( while True: try: - # When falling back to coarser units, don't pass unit_for_numerics - # because it's meant for ns resolution only + # When falling back to coarser units, set unit_for_numerics to None + # so it will be computed from fallback_creso inside _array_to_datetime_impl fallback_unit_for_numerics = unit_for_numerics if fallback_creso == original_creso else None return _array_to_datetime_impl( values, errors, dayfirst, yearfirst, utc, fallback_creso, fallback_unit_for_numerics From 84d5d925bfc60d07c43bdc0f8b2f06097c9da579 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:44:43 +0000 Subject: [PATCH 5/5] Refactor fallback logic to only apply during inference mode Per reviewer feedback: - Fallback now ONLY applies during inference mode (creso=GENERIC) - When explicit unit is passed, OutOfBoundsDatetime is raised as before - Removed wrapper functions and use recursion to match existing code style - Keep unit_for_numerics unchanged during fallback - Added get_next_coarser_unit() helper function in both files Co-authored-by: jbrockmendel <8078968+jbrockmendel@users.noreply.github.com> --- pandas/_libs/tslib.pyx | 130 ++++++++---------- pandas/_libs/tslibs/strptime.pyx | 77 ++++------- pandas/tests/tslibs/test_array_to_datetime.py | 27 ---- pandas/tests/tslibs/test_strptime.py | 27 ---- 4 files changed, 84 insertions(+), 177 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 1c11a99d6795f..0586ca26557e7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -55,23 +55,6 @@ from pandas._libs.util cimport ( from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime - -cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso): - """ - Get the next coarser unit in the sequence: ns -> us -> ms -> s - Returns NPY_FR_GENERIC if there is no coarser unit available. - """ - if creso == NPY_DATETIMEUNIT.NPY_FR_ns: - return NPY_DATETIMEUNIT.NPY_FR_us - elif creso == NPY_DATETIMEUNIT.NPY_FR_us: - return NPY_DATETIMEUNIT.NPY_FR_ms - elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: - return NPY_DATETIMEUNIT.NPY_FR_s - else: - # No coarser unit available - return NPY_DATETIMEUNIT.NPY_FR_GENERIC - - from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, @@ -102,6 +85,22 @@ from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single +cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso): + """ + Get the next coarser unit in the sequence: ns -> us -> ms -> s + Returns NPY_FR_GENERIC if there is no coarser unit available. + """ + if creso == NPY_DATETIMEUNIT.NPY_FR_ns: + return NPY_DATETIMEUNIT.NPY_FR_us + elif creso == NPY_DATETIMEUNIT.NPY_FR_us: + return NPY_DATETIMEUNIT.NPY_FR_ms + elif creso == NPY_DATETIMEUNIT.NPY_FR_ms: + return NPY_DATETIMEUNIT.NPY_FR_s + else: + # No coarser unit available + return NPY_DATETIMEUNIT.NPY_FR_GENERIC + + def _test_parse_iso8601(ts: str): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used @@ -318,60 +317,6 @@ cpdef array_to_datetime( May be datetime64[creso_unit] or object dtype tzinfo or None """ - # Try to parse with the given resolution, falling back to coarser units if needed - cdef: - NPY_DATETIMEUNIT fallback_creso = creso - NPY_DATETIMEUNIT original_creso = creso - bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC - - while True: - try: - # When falling back to coarser units, set unit_for_numerics to None - # so it will be computed from fallback_creso inside _array_to_datetime_impl - fallback_unit_for_numerics = unit_for_numerics if fallback_creso == original_creso else None - return _array_to_datetime_impl( - values, errors, dayfirst, yearfirst, utc, fallback_creso, fallback_unit_for_numerics - ) - except (OutOfBoundsDatetime, OverflowError): - # Only attempt fallback if we're in inference mode or creso is one - # of the finer resolutions (ns, us, ms) - if not infer_reso and original_creso not in ( - NPY_DATETIMEUNIT.NPY_FR_ns, - NPY_DATETIMEUNIT.NPY_FR_us, - NPY_DATETIMEUNIT.NPY_FR_ms, - ): - # User explicitly requested a coarse resolution, don't fall back - raise - - # If we're in inference mode and haven't set a fallback yet, - # start from nanoseconds for the first retry - if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns - # Continue to retry with ns - continue - - # Try the next coarser unit - fallback_creso = get_next_coarser_unit(fallback_creso) - if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - # No coarser unit available, re-raise the error - raise - # Continue with coarser unit - - -@cython.wraparound(False) -@cython.boundscheck(False) -cdef _array_to_datetime_impl( - ndarray values, # object dtype, arbitrary ndim - str errors, - bint dayfirst, - bint yearfirst, - bint utc, - NPY_DATETIMEUNIT creso, - str unit_for_numerics, -): - """ - Internal implementation of array_to_datetime with a specific resolution. - """ cdef: Py_ssize_t i, n = values.size object val @@ -423,14 +368,50 @@ cdef _array_to_datetime_impl( if infer_reso: creso = state.creso tz_out = state.process_datetime(val, tz_out, utc_convert) - iresult[i] = parse_pydatetime(val, &dts, creso=creso) + try: + iresult[i] = parse_pydatetime(val, &dts, creso=creso) + except OverflowError: + if infer_reso: + # During inference, try falling back to coarser unit + next_creso = get_next_coarser_unit(creso) + if next_creso != NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # Retry with coarser unit + return array_to_datetime( + values, + errors=errors, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + creso=next_creso, + unit_for_numerics=unit_for_numerics, + ) + # Either not in inference mode or no coarser unit available + raise elif PyDate_Check(val): item_reso = NPY_DATETIMEUNIT.NPY_FR_s state.update_creso(item_reso) if infer_reso: creso = state.creso - iresult[i] = pydate_to_dt64(val, &dts, reso=creso) + try: + iresult[i] = pydate_to_dt64(val, &dts, reso=creso) + except OverflowError: + if infer_reso: + # During inference, try falling back to coarser unit + next_creso = get_next_coarser_unit(creso) + if next_creso != NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # Retry with coarser unit + return array_to_datetime( + values, + errors=errors, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + creso=next_creso, + unit_for_numerics=unit_for_numerics, + ) + # Either not in inference mode or no coarser unit available + raise state.found_other = True elif cnp.is_datetime64_object(val): @@ -524,14 +505,13 @@ cdef _array_to_datetime_impl( if state.creso_ever_changed: # We encountered mismatched resolutions, need to re-parse with # the correct one. - return _array_to_datetime_impl( + return array_to_datetime( values, errors=errors, yearfirst=yearfirst, dayfirst=dayfirst, utc=utc, creso=state.creso, - unit_for_numerics=None, ) elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # i.e. we never encountered anything non-NaT, default to "s". This diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 8219cc0f220f2..0b74969637d21 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -384,54 +384,7 @@ def array_strptime( creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC Set to NPY_FR_GENERIC to infer a resolution. """ - # Try to parse with the given resolution, falling back to coarser units if needed - cdef: - NPY_DATETIMEUNIT fallback_creso = creso - NPY_DATETIMEUNIT original_creso = creso - bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC - - while True: - try: - return _array_strptime_impl( - values, fmt, exact, errors, utc, fallback_creso - ) - except OutOfBoundsDatetime: - # Only attempt fallback if we're in inference mode or creso is one - # of the finer resolutions (ns, us, ms) - if not infer_reso and original_creso not in ( - NPY_DATETIMEUNIT.NPY_FR_ns, - NPY_DATETIMEUNIT.NPY_FR_us, - NPY_DATETIMEUNIT.NPY_FR_ms, - ): - # User explicitly requested a coarse resolution, don't fall back - raise - - # If we're in inference mode and haven't set a fallback yet, - # start from nanoseconds for the first retry - if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns - # Continue to retry with ns - continue - - # Try the next coarser unit - fallback_creso = get_next_coarser_unit(fallback_creso) - if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - # No coarser unit available, re-raise the error - raise - # Continue with coarser unit - -cdef _array_strptime_impl( - ndarray[object] values, - str fmt, - bint exact, - str errors, - bint utc, - NPY_DATETIMEUNIT creso, -): - """ - Internal implementation of array_strptime with a specific resolution. - """ cdef: Py_ssize_t i, n = len(values) npy_datetimestruct dts @@ -535,6 +488,20 @@ cdef _array_strptime_impl( try: value = npy_datetimestruct_to_datetime(creso, &dts) except OverflowError as err: + if infer_reso: + # During inference, try falling back to coarser unit + next_creso = get_next_coarser_unit(creso) + if next_creso != NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # Retry with coarser unit + return array_strptime( + values, + fmt=fmt, + exact=exact, + errors=errors, + utc=utc, + creso=next_creso, + ) + # Either not in inference mode or no coarser unit available attrname = npy_unit_to_attrname[creso] raise OutOfBoundsDatetime( f"Out of bounds {attrname} timestamp: {val}" @@ -580,6 +547,20 @@ cdef _array_strptime_impl( try: iresult[i] = npy_datetimestruct_to_datetime(creso, &dts) except OverflowError as err: + if infer_reso: + # During inference, try falling back to coarser unit + next_creso = get_next_coarser_unit(creso) + if next_creso != NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # Retry with coarser unit + return array_strptime( + values, + fmt=fmt, + exact=exact, + errors=errors, + utc=utc, + creso=next_creso, + ) + # Either not in inference mode or no coarser unit available attrname = npy_unit_to_attrname[creso] raise OutOfBoundsDatetime( f"Out of bounds {attrname} timestamp: {val}" @@ -628,7 +609,7 @@ cdef _array_strptime_impl( if state.creso_ever_changed: # We encountered mismatched resolutions, need to re-parse with # the correct one. - return _array_strptime_impl( + return array_strptime( values, fmt=fmt, exact=exact, diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 0c513af5da30a..fc0000553049e 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -105,33 +105,6 @@ def test_infer_with_nat_int_float_str(self, item): assert tz2 is None tm.assert_numpy_array_equal(result2, expected[::-1]) - def test_array_to_datetime_fallback_to_us_explicit_ns(self): - # Test automatic fallback from explicit nanoseconds to microseconds - # When explicitly requesting ns resolution, year 2401 should fall back to us - from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - creso_ns = NpyDatetimeUnit.NPY_FR_ns.value - - vals = np.array(["2401-09-15"], dtype=object) - result, tz = tslib.array_to_datetime(vals, creso=creso_ns) - assert tz is None - assert result.dtype == np.dtype("M8[us]") - expected = np.array(["2401-09-15"], dtype="M8[us]") - tm.assert_numpy_array_equal(result, expected) - - def test_array_to_datetime_fallback_mixed_explicit_ns(self): - # Test automatic fallback with explicit ns resolution - # When one value is in nano range and one is out, both should use coarser unit - from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - creso_ns = NpyDatetimeUnit.NPY_FR_ns.value - - vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) - result, tz = tslib.array_to_datetime(vals, creso=creso_ns) - assert tz is None - # Both values should be in microseconds since one is out of nano range - assert result.dtype == np.dtype("M8[us]") - expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]") - tm.assert_numpy_array_equal(result, expected) - class TestArrayToDatetimeWithTZResolutionInference: def test_array_to_datetime_with_tz_resolution(self): diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index 753ee7ba6ea0a..d726006b03f6d 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -108,30 +108,3 @@ def test_array_strptime_str_outside_nano_range(self): fmt2 = "%b %d, %Y" res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer) tm.assert_numpy_array_equal(res2, expected2) - - def test_array_strptime_fallback_to_us_explicit_ns(self): - # Test automatic fallback from explicit nanoseconds to microseconds - # When explicitly requesting ns resolution, year 2401 should fall back to us - from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - creso_ns = NpyDatetimeUnit.NPY_FR_ns.value - - vals = np.array(["2401-09-15"], dtype=object) - fmt = "ISO8601" - res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns) - assert res.dtype == np.dtype("M8[us]") - expected = np.array(["2401-09-15"], dtype="M8[us]") - tm.assert_numpy_array_equal(res, expected) - - def test_array_strptime_fallback_mixed_explicit_ns(self): - # Test automatic fallback with explicit ns resolution - # When one value is in nano range and one is out, both should use coarser unit - from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - creso_ns = NpyDatetimeUnit.NPY_FR_ns.value - - vals = np.array(["2020-01-01", "2401-09-15"], dtype=object) - fmt = "ISO8601" - res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns) - # Both values should be in microseconds since one is out of nano range - assert res.dtype == np.dtype("M8[us]") - expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]") - tm.assert_numpy_array_equal(res, expected)