pandas-dev · rhshadrach · Oct 19, 2025 · Oct 25, 2025 · Oct 25, 2025 · jbrockmendel
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -371,6 +371,63 @@ In cases with mixed-resolution inputs, the highest resolution is used:
     In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
     Out[2]: dtype('<M8[ns]')
 
+.. _whatsnew_300.api_breaking.concat_datetime_sorting:
+
+:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When all objects passed to :func:`concat` have a :class:`DatetimeIndex`,
+passing ``sort=False`` will now result in the non-concatenation axis not
+being sorted. Previously, the result would always be sorted along
+the non-concatenation axis even when ``sort=False`` is passed.
+
+If you do not specify the ``sort`` argument, pandas will continue to return a
+sorted result but this behavior is deprecated and you will receive a warning.
+In order to make this less noisy for users, pandas checks if not sorting would
+impact the result and only warns when it would. This check can be expensive,
+and users can skip the check by explicitly specifying ``sort=True`` or
+``sort=False``.
+
+This deprecation can also impact pandas' internal usage of :func:`concat`.
+While we have investigated uses of :func:`concat` to determine if this could lead
+to a change in behavior of other functions and methods in the API, it is
+possible some have been missed. In order to be cautious here, pandas has *not*
+added ``sort=False`` to any internal calls where we believe behavior should not change.
+If we have missed something, users will not experience a behavior change but they
+will receive a warning about :func:`concat` even though they are not directly
+calling this function. If this does occur, we ask users to open an issue so that
+we may address any potential behavior changes.
+
+.. ipython:: python
+
+    idx1 = pd.date_range("2025-01-02", periods=3, freq="h")
+    df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1)
+    df1
+
+    idx2 = pd.date_range("2025-01-01", periods=3, freq="h")
+    df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2)
+    df2
+
+*Old behavior*
+
+.. code-block:: ipython
+
+    In [3]: pd.concat([df1, df2], axis=1, sort=False)
+    Out[3]:
+                           a    b
+    2025-01-01 00:00:00  NaN  1.0
+    2025-01-01 01:00:00  NaN  2.0
+    2025-01-01 02:00:00  NaN  3.0
+    2025-01-02 00:00:00  1.0  NaN
+    2025-01-02 01:00:00  2.0  NaN
+    2025-01-02 02:00:00  3.0  NaN
+
+*New behavior*
+
+.. ipython:: python
+
+    pd.concat([df1, df2], axis=1, sort=False)
+
 .. _whatsnew_300.api_breaking.value_counts_sorting:
 
 Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
@@ -1165,6 +1222,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
+- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

@@ -5236,7 +5236,7 @@ def shift(
         return (
             shifted_dataframes[0]
             if len(shifted_dataframes) == 1
-            else concat(shifted_dataframes, axis=1)
+            else concat(shifted_dataframes, axis=1, sort=False)
         )
 
     @final

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -64,7 +64,7 @@ def get_objs_combined_axis(
     objs,
     intersect: bool = False,
     axis: Axis = 0,
-    sort: bool = True,
+    sort: bool | lib.NoDefault = True,
 ) -> Index:
     """
     Extract combined index: return intersection or union (depending on the
@@ -81,7 +81,8 @@ def get_objs_combined_axis(
     axis : {0 or 'index', 1 or 'outer'}, default 0
         The axis to extract indexes from.
     sort : bool, default True
-        Whether the result index should come out sorted or not.
+        Whether the result index should come out sorted or not. NoDefault
+        use for deprecation in GH#57335.
 
     Returns
     -------
@@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]:
 def _get_combined_index(
     indexes: list[Index],
     intersect: bool = False,
-    sort: bool = False,
+    sort: bool | lib.NoDefault = False,
 ) -> Index:
     """
     Return the union or intersection of indexes.
@@ -121,7 +122,8 @@ def _get_combined_index(
         If True, calculate the intersection between indexes. Otherwise,
         calculate the union.
     sort : bool, default False
-        Whether the result index should come out sorted or not.
+        Whether the result index should come out sorted or not. NoDefault
+        used for deprecation of GH#57335
 
     Returns
     -------
@@ -138,10 +140,10 @@ def _get_combined_index(
         for other in indexes[1:]:
             index = index.intersection(other)
     else:
-        index = union_indexes(indexes, sort=False)
+        index = union_indexes(indexes, sort=sort if sort is lib.no_default else False)
         index = ensure_index(index)
 
-    if sort:
+    if sort and sort is not lib.no_default:
         index = safe_sort_index(index)
     return index
 
@@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index:
     return index
 
 
-def union_indexes(indexes, sort: bool | None = True) -> Index:
+def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index:
     """
     Return the union of indexes.
 
@@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
     ----------
     indexes : list of Index or list objects
     sort : bool, default True
-        Whether the result index should come out sorted or not.
+        Whether the result index should come out sorted or not. NoDefault
+        used for deprecation of GH#57335.
 
     Returns
     -------
@@ -201,7 +204,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
     if len(indexes) == 1:
         result = indexes[0]
         if isinstance(result, list):
-            if not sort:
+            if not sort or sort is lib.no_default:
                 result = Index(result)
             else:
                 result = Index(sorted(result))
@@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
             raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
 
         if num_dtis == len(indexes):
-            sort = True
+            if sort is lib.no_default:
+                sort = True
             result = indexes[0]
 
         elif num_dtis > 1:

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -45,7 +45,9 @@
     ensure_index,
     get_objs_combined_axis,
     get_unanimous_names,
+    union_indexes,
 )
+from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.internals import concatenate_managers
 
 if TYPE_CHECKING:
@@ -162,7 +164,7 @@ def concat(
     levels=None,
     names: list[HashableT] | None = None,
     verify_integrity: bool = False,
-    sort: bool = False,
+    sort: bool | lib.NoDefault = lib.no_default,
     copy: bool | lib.NoDefault = lib.no_default,
 ) -> DataFrame | Series:
     """
@@ -405,13 +407,41 @@ def concat(
             "Only can inner (intersect) or outer (union) join the other axis"
         )
 
-    if not is_bool(sort):
+    objs, keys, ndims = _clean_keys_and_objs(objs, keys)
+
+    if sort is lib.no_default:
+        if axis == 0:
+            non_concat_axis = [
+                obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name])
+                for obj in objs
+            ]
+        else:
+            non_concat_axis = [obj.index for obj in objs]
+
+        if (
+            intersect
+            or any(not isinstance(index, DatetimeIndex) for index in non_concat_axis)
+            or all(
+                id(prev) == id(curr)
+                for prev, curr in zip(non_concat_axis, non_concat_axis[1:])
+            )
+            or (
+                all(
+                    prev[-1] <= curr[0] and prev.is_monotonic_increasing
+                    for prev, curr in zip(non_concat_axis, non_concat_axis[1:])
+                    if not prev.empty and not curr.empty
+                )
+                and non_concat_axis[-1].is_monotonic_increasing
+            )
+        ):
+            # Sorting or not will not impact the result.
+            sort = False
+    elif not is_bool(sort):
         raise ValueError(
             f"The 'sort' keyword only accepts boolean values; {sort} was passed."
         )
-    sort = bool(sort)
-
-    objs, keys, ndims = _clean_keys_and_objs(objs, keys)
+    else:
+        sort = bool(sort)
 
     # select an object to be our result reference
     sample, objs = _get_sample_object(objs, ndims, keys, names, levels, intersect)
@@ -436,9 +466,10 @@ def concat(
     if len(ndims) > 1:
         objs = _sanitize_mixed_ndim(objs, sample, ignore_index, bm_axis)
 
+    orig_axis = axis
     axis = 1 - bm_axis if is_frame else 0
     names = names or getattr(keys, "names", None)
-    return _get_result(
+    result = _get_result(
         objs,
         is_series,
         bm_axis,
@@ -452,6 +483,28 @@ def concat(
         axis,
     )
 
+    if sort is lib.no_default:
+        if orig_axis == 0:
+            non_concat_axis = [
+                obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name])
+                for obj in objs
+            ]
+        else:
+            non_concat_axis = [obj.index for obj in objs]
+        no_sort_result_index = union_indexes(non_concat_axis, sort=False)
+        orig = result.index if orig_axis == 1 else result.columns
+        if not no_sort_result_index.equals(orig):
+            msg = (
+                "Sorting by default when concatenating all DatetimeIndex is "
+                "deprecated.  In the future, pandas will respect the default "
+                "of `sort=False`. Specify `sort=True` or `sort=False` to "
+                "silence this message. If you see this warnings when not "
+                "directly calling concat, report a bug to pandas."
+            )
+            warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
+
+    return result
+
 
 def _sanitize_mixed_ndim(
     objs: list[Series | DataFrame],
@@ -510,7 +563,7 @@ def _get_result(
     bm_axis: AxisInt,
     ignore_index: bool,
     intersect: bool,
-    sort: bool,
+    sort: bool | lib.NoDefault,
     keys: Iterable[Hashable] | None,
     levels,
     verify_integrity: bool,
@@ -620,7 +673,7 @@ def new_axes(
     objs: list[Series | DataFrame],
     bm_axis: AxisInt,
     intersect: bool,
-    sort: bool,
+    sort: bool | lib.NoDefault,
     keys: Iterable[Hashable] | None,
     names: list[HashableT] | None,
     axis: AxisInt,

diff --git a/pandas/tests/groupby/methods/test_groupby_shift_diff.py b/pandas/tests/groupby/methods/test_groupby_shift_diff.py
@@ -248,3 +248,21 @@ def test_group_shift_with_multiple_periods_and_both_fill_and_freq_deprecated():
     msg = "Passing a 'freq' together with a 'fill_value'"
     with pytest.raises(ValueError, match=msg):
         df.groupby("b")[["a"]].shift([1, 2], fill_value=1, freq="h")
+
+
+def test_groupby_shift_multiple_periods_unsorted_index():
+    # https://github.com/pandas-dev/pandas/pull/62843
+    idx = date_range("1/1/2000", periods=4, freq="h")
+    df = DataFrame(
+        {"a": [1, 2, 3], "b": [True, True, False]},
+        index=[idx[2], idx[0], idx[1]],
+    )
+    result = df.groupby("b")[["a"]].shift([0, 1], freq="h")
+    expected = DataFrame(
+        {
+            "a_0": [1.0, 2.0, 3.0, np.nan],
+            "a_1": [3.0, np.nan, 2.0, 1.0],
+        },
+        index=[idx[2], idx[0], idx[1], idx[3]],
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -10,7 +10,10 @@
 import numpy as np
 import pytest
 
-from pandas.errors import InvalidIndexError
+from pandas.errors import (
+    InvalidIndexError,
+    Pandas4Warning,
+)
 
 import pandas as pd
 from pandas import (
@@ -434,7 +437,9 @@ def test_concat_bug_1719(self):
         # to join with union
         # these two are of different length!
         left = concat([ts1, ts2], join="outer", axis=1)
-        right = concat([ts2, ts1], join="outer", axis=1)
+        msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
+        with tm.assert_produces_warning(Pandas4Warning, match=msg):
+            right = concat([ts2, ts1], join="outer", axis=1)
 
         assert len(left) == len(right)
 

diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import Pandas4Warning
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -69,7 +71,9 @@ def test_concat_datetime_timezone(self):
 
         idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo")
         df3 = DataFrame({"b": [1, 2, 3]}, index=idx3)
-        result = concat([df1, df3], axis=1)
+        msg = "Sorting by default when concatenating all DatetimeIndex"
+        with tm.assert_produces_warning(Pandas4Warning, match=msg):
+            result = concat([df1, df3], axis=1)
 
         exp_idx = DatetimeIndex(
             [