Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ When passing strings, the resolution will depend on the precision of the string,
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
Out[5]: dtype('<M8[ns]')

The inferred resolution now matches that of the input strings:
The inferred resolution now matches that of the input strings for nanosecond-precision strings, otherwise defaulting to microseconds:

.. ipython:: python

Expand All @@ -364,6 +364,8 @@ The inferred resolution now matches that of the input strings:
In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype

This is also a change for the :class:`Timestamp` constructor with a string input, which in version 2.x.y could give second or millisecond unit, which users generally disliked (:issue:`52653`)

In cases with mixed-resolution inputs, the highest resolution is used:

.. code-block:: ipython
Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
if reso < NPY_FR_us:
reso = NPY_FR_us
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
Expand Down Expand Up @@ -661,6 +663,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
nanos=&nanos,
)
reso = get_supported_reso(out_bestunit)
if reso < NPY_FR_us:
reso = NPY_FR_us
return convert_datetime_to_tsobject(dt, tz, nanos=nanos, reso=reso)


Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ def array_strptime(
# No error reported by string_to_dts, pick back up
# where we left off
item_reso = get_supported_reso(out_bestunit)
if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
Expand Down Expand Up @@ -510,6 +512,8 @@ def array_strptime(
val, fmt, exact, format_regex, locale_time, &dts, &item_reso
)

if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
Expand Down
2 changes: 1 addition & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series:
(Period("2012-01", freq="M"), "period[M]"),
(Period("2012-02-01", freq="D"), "period[D]"),
(
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"),
DatetimeTZDtype(unit="s", tz="US/Eastern"),
),
(Timedelta(seconds=500), "timedelta64[ns]"),
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def unique(values):
array([2, 1])

>>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
array(['2016-01-01T00:00:00'], dtype='datetime64[s]')
array(['2016-01-01T00:00:00.000000'], dtype='datetime64[us]')

>>> pd.unique(
... pd.Series(
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1912,11 +1912,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:

>>> rng_tz.floor("2h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)

>>> rng_tz.floor("2h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)
"""

_floor_example = """>>> rng.floor('h')
Expand All @@ -1939,11 +1939,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:

>>> rng_tz.floor("2h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)

>>> rng_tz.floor("2h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)
"""

_ceil_example = """>>> rng.ceil('h')
Expand All @@ -1966,11 +1966,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:

>>> rng_tz.ceil("h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)

>>> rng_tz.ceil("h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[s, Europe/Amsterdam]', freq=None)
dtype='datetime64[us, Europe/Amsterdam]', freq=None)
"""


Expand Down
16 changes: 8 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
... )
<DatetimeArray>
['2023-01-01 00:00:00', '2023-01-02 00:00:00']
Length: 2, dtype: datetime64[s]
Length: 2, dtype: datetime64[us]
"""

__module__ = "pandas.arrays"
Expand Down Expand Up @@ -612,7 +612,7 @@ def tz(self) -> tzinfo | None:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.tz
datetime.timezone.utc

Expand Down Expand Up @@ -1441,7 +1441,7 @@ def time(self) -> npt.NDArray[np.object_]:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.time
0 10:00:00
1 11:00:00
Expand Down Expand Up @@ -1484,7 +1484,7 @@ def timetz(self) -> npt.NDArray[np.object_]:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.timetz
0 10:00:00+00:00
1 11:00:00+00:00
Expand Down Expand Up @@ -1526,7 +1526,7 @@ def date(self) -> npt.NDArray[np.object_]:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.date
0 2020-01-01
1 2020-02-01
Expand Down Expand Up @@ -1875,7 +1875,7 @@ def isocalendar(self) -> DataFrame:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.dayofyear
0 1
1 32
Expand Down Expand Up @@ -1911,7 +1911,7 @@ def isocalendar(self) -> DataFrame:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-04-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.quarter
0 1
1 2
Expand Down Expand Up @@ -1947,7 +1947,7 @@ def isocalendar(self) -> DataFrame:
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 11:00:00+00:00
dtype: datetime64[s, UTC]
dtype: datetime64[us, UTC]
>>> s.dt.daysinmonth
0 31
1 29
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,7 +1380,7 @@ def factorize(
0 2000-03-11
1 2000-03-12
2 2000-03-13
dtype: datetime64[s]
dtype: datetime64[us]

>>> ser.searchsorted('3/14/2000')
np.int64(3)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
>>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"])
>>> index
DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
dtype='datetime64[s]', freq=None)
dtype='datetime64[us]', freq=None)
>>> pd.isna(index)
array([False, False, True, False])

Expand Down Expand Up @@ -365,7 +365,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
>>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"])
>>> index
DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
dtype='datetime64[s]', freq=None)
dtype='datetime64[us]', freq=None)
>>> pd.notna(index)
array([ True, True, False, True])

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6319,7 +6319,7 @@ def dtypes(self):
>>> df.dtypes
float float64
int int64
datetime datetime64[s]
datetime datetime64[us]
string str
dtype: object
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,7 @@ def idxmin(self, skipna: bool = True) -> Series:
>>> ser.groupby(["a", "a", "b", "b"]).idxmin()
a 2023-01-01
b 2023-02-01
dtype: datetime64[s]
dtype: datetime64[us]
"""
return self._idxmax_idxmin("idxmin", skipna=skipna)

Expand Down Expand Up @@ -1505,7 +1505,7 @@ def idxmax(self, skipna: bool = True) -> Series:
>>> ser.groupby(["a", "a", "b", "b"]).idxmax()
a 2023-01-15
b 2023-02-15
dtype: datetime64[s]
dtype: datetime64[us]
"""
return self._idxmax_idxmin("idxmax", skipna=skipna)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
>>> idx
DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'],
dtype='datetime64[s, UTC]', freq=None)
dtype='datetime64[us, UTC]', freq=None)
"""

_typ = "datetimeindex"
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2133,14 +2133,14 @@ def unique(self) -> ArrayLike:
>>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique()
<DatetimeArray>
['2016-01-01 00:00:00']
Length: 1, dtype: datetime64[s]
Length: 1, dtype: datetime64[us]

>>> pd.Series(
... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
... ).unique()
<DatetimeArray>
['2016-01-01 00:00:00-05:00']
Length: 1, dtype: datetime64[s, US/Eastern]
Length: 1, dtype: datetime64[us, US/Eastern]

An Categorical will return categories in the order of
appearance and with the same dtype.
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ def to_datetime(
>>> pd.to_datetime(df)
0 2015-02-04
1 2016-03-05
dtype: datetime64[s]
dtype: datetime64[us]

Using a unix epoch time

Expand Down Expand Up @@ -924,14 +924,14 @@ def to_datetime(

>>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"])
DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],
dtype='datetime64[s]', freq=None)
dtype='datetime64[us]', freq=None)

- Timezone-aware inputs *with constant time offset* are converted to
timezone-aware :class:`DatetimeIndex`:

>>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"])
DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
dtype='datetime64[s, UTC-05:00]', freq=None)
dtype='datetime64[us, UTC-05:00]', freq=None)

- However, timezone-aware inputs *with mixed time offsets* (for example
issued from a timezone with daylight savings, such as Europe/Paris)
Expand Down Expand Up @@ -973,14 +973,14 @@ def to_datetime(

>>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True)
DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'],
dtype='datetime64[s, UTC]', freq=None)
dtype='datetime64[us, UTC]', freq=None)

- Timezone-aware inputs are *converted* to UTC (the output represents the
exact same datetime, but viewed from the UTC time offset `+00:00`).

>>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True)
DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
dtype='datetime64[s, UTC]', freq=None)
dtype='datetime64[us, UTC]', freq=None)

- Inputs can contain both string or datetime, the above
rules still apply
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_dt64_array(dtype_unit):
(
pd.DatetimeIndex(["2000", "2001"]),
None,
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"),
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"),
),
(
["2000", "2001"],
Expand Down Expand Up @@ -323,7 +323,7 @@ def test_array_copy():
([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
# datetime
(
[pd.Timestamp("2000"), pd.Timestamp("2001")],
[pd.Timestamp("2000").as_unit("s"), pd.Timestamp("2001").as_unit("s")],
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"),
),
(
Expand All @@ -342,7 +342,10 @@ def test_array_copy():
),
# datetimetz
(
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
[
pd.Timestamp("2000", tz="CET").as_unit("s"),
pd.Timestamp("2001", tz="CET").as_unit("s"),
],
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="s")
),
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@ def test_to_numpy_dtype(as_series):
[
([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
(
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
[Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT],
None,
Timestamp("2000"),
Timestamp("2000").as_unit("s"),
[np.datetime64("2000-01-01T00:00:00", "s")] * 3,
),
],
Expand Down Expand Up @@ -486,10 +486,14 @@ def test_to_numpy_na_value_numpy_dtype(
[1, 2, 0, 4],
),
(
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
[(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))],
[Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT],
[
(0, Timestamp("2021").as_unit("s")),
(0, Timestamp("2022").as_unit("s")),
(1, Timestamp("2000").as_unit("s")),
],
None,
Timestamp("2000"),
Timestamp("2000").as_unit("s"),
[np.datetime64("2000-01-01T00:00:00", "s")] * 3,
),
],
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/cast/test_infer_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ def test_infer_dtype_from_scalar_errors():
(1, np.int64),
(1.5, np.float64),
(np.datetime64("2016-01-01"), np.dtype("M8[s]")),
(Timestamp("20160101"), np.dtype("M8[s]")),
(Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
(Timestamp("20160101").as_unit("s"), np.dtype("M8[s]")),
(Timestamp("20160101", tz="UTC").as_unit("s"), "datetime64[s, UTC]"),
],
)
def test_infer_dtype_from_scalar(value, expected, using_infer_string):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3470,9 +3470,9 @@ def test_string_to_datetime_parsing_cast():
# GH 56266
string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]
result = pd.Series(string_dates, dtype="timestamp[s][pyarrow]")
expected = pd.Series(
ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True))
)

pd_res = pd.to_datetime(string_dates).as_unit("s")
expected = pd.Series(ArrowExtensionArray(pa.array(pd_res, from_pandas=True)))
tm.assert_series_equal(result, expected)


Expand Down
Loading
Loading