-
-
Couldn't load subscription status.
- Fork 19.2k
DEPR/BUG: Do not ignore sort in concat for DatetimeIndex #62843
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,7 +64,7 @@ def get_objs_combined_axis( | |
| objs, | ||
| intersect: bool = False, | ||
| axis: Axis = 0, | ||
| sort: bool = True, | ||
| sort: bool | lib.NoDefault = True, | ||
| ) -> Index: | ||
| """ | ||
| Extract combined index: return intersection or union (depending on the | ||
|
|
@@ -81,7 +81,8 @@ def get_objs_combined_axis( | |
| axis : {0 or 'index', 1 or 'outer'}, default 0 | ||
| The axis to extract indexes from. | ||
| sort : bool, default True | ||
| Whether the result index should come out sorted or not. | ||
| Whether the result index should come out sorted or not. NoDefault | ||
| use for deprecation in GH#57335. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]: | |
| def _get_combined_index( | ||
| indexes: list[Index], | ||
| intersect: bool = False, | ||
| sort: bool = False, | ||
| sort: bool | lib.NoDefault = False, | ||
| ) -> Index: | ||
| """ | ||
| Return the union or intersection of indexes. | ||
|
|
@@ -121,7 +122,8 @@ def _get_combined_index( | |
| If True, calculate the intersection between indexes. Otherwise, | ||
| calculate the union. | ||
| sort : bool, default False | ||
| Whether the result index should come out sorted or not. | ||
| Whether the result index should come out sorted or not. NoDefault | ||
| used for deprecation of GH#57335 | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -138,10 +140,10 @@ def _get_combined_index( | |
| for other in indexes[1:]: | ||
| index = index.intersection(other) | ||
| else: | ||
| index = union_indexes(indexes, sort=False) | ||
| index = union_indexes(indexes, sort=sort if sort is lib.no_default else False) | ||
| index = ensure_index(index) | ||
|
|
||
| if sort: | ||
| if sort and sort is not lib.no_default: | ||
| index = safe_sort_index(index) | ||
| return index | ||
|
|
||
|
|
@@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index: | |
| return index | ||
|
|
||
|
|
||
| def union_indexes(indexes, sort: bool | None = True) -> Index: | ||
| def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is None still needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By both type checking and test coverage it looks like no, will remove. |
||
| """ | ||
| Return the union of indexes. | ||
|
|
||
|
|
@@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: | |
| ---------- | ||
| indexes : list of Index or list objects | ||
| sort : bool, default True | ||
| Whether the result index should come out sorted or not. | ||
| Whether the result index should come out sorted or not. NoDefault | ||
| used for deprecation of GH#57335. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -201,7 +204,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: | |
| if len(indexes) == 1: | ||
| result = indexes[0] | ||
| if isinstance(result, list): | ||
| if not sort: | ||
| if not sort or sort is lib.no_default: | ||
| result = Index(result) | ||
| else: | ||
| result = Index(sorted(result)) | ||
|
|
@@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: | |
| raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") | ||
|
|
||
| if num_dtis == len(indexes): | ||
| sort = True | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if sort is lib.no_default: | ||
| sort = True | ||
| result = indexes[0] | ||
|
|
||
| elif num_dtis > 1: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,7 +45,9 @@ | |
| ensure_index, | ||
| get_objs_combined_axis, | ||
| get_unanimous_names, | ||
| union_indexes, | ||
| ) | ||
| from pandas.core.indexes.datetimes import DatetimeIndex | ||
| from pandas.core.internals import concatenate_managers | ||
|
|
||
| if TYPE_CHECKING: | ||
|
|
@@ -162,7 +164,7 @@ def concat( | |
| levels=None, | ||
| names: list[HashableT] | None = None, | ||
| verify_integrity: bool = False, | ||
| sort: bool = False, | ||
| sort: bool | lib.NoDefault = lib.no_default, | ||
| copy: bool | lib.NoDefault = lib.no_default, | ||
| ) -> DataFrame | Series: | ||
| """ | ||
|
|
@@ -405,13 +407,41 @@ def concat( | |
| "Only can inner (intersect) or outer (union) join the other axis" | ||
| ) | ||
|
|
||
| if not is_bool(sort): | ||
| objs, keys, ndims = _clean_keys_and_objs(objs, keys) | ||
|
|
||
| if sort is lib.no_default: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if axis == 0: | ||
| non_concat_axis = [ | ||
| obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name]) | ||
| for obj in objs | ||
| ] | ||
| else: | ||
| non_concat_axis = [obj.index for obj in objs] | ||
|
|
||
| if ( | ||
| intersect | ||
| or any(not isinstance(index, DatetimeIndex) for index in non_concat_axis) | ||
| or all( | ||
| id(prev) == id(curr) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) | ||
| ) | ||
| or ( | ||
| all( | ||
| prev[-1] <= curr[0] and prev.is_monotonic_increasing | ||
| for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) | ||
| if not prev.empty and not curr.empty | ||
| ) | ||
| and non_concat_axis[-1].is_monotonic_increasing | ||
| ) | ||
| ): | ||
| # Sorting or not will not impact the result. | ||
| sort = False | ||
| elif not is_bool(sort): | ||
| raise ValueError( | ||
| f"The 'sort' keyword only accepts boolean values; {sort} was passed." | ||
| ) | ||
| sort = bool(sort) | ||
|
|
||
| objs, keys, ndims = _clean_keys_and_objs(objs, keys) | ||
| else: | ||
| sort = bool(sort) | ||
|
|
||
| # select an object to be our result reference | ||
| sample, objs = _get_sample_object(objs, ndims, keys, names, levels, intersect) | ||
|
|
@@ -436,9 +466,10 @@ def concat( | |
| if len(ndims) > 1: | ||
| objs = _sanitize_mixed_ndim(objs, sample, ignore_index, bm_axis) | ||
|
|
||
| orig_axis = axis | ||
| axis = 1 - bm_axis if is_frame else 0 | ||
| names = names or getattr(keys, "names", None) | ||
| return _get_result( | ||
| result = _get_result( | ||
| objs, | ||
| is_series, | ||
| bm_axis, | ||
|
|
@@ -452,6 +483,28 @@ def concat( | |
| axis, | ||
| ) | ||
|
|
||
| if sort is lib.no_default: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if orig_axis == 0: | ||
| non_concat_axis = [ | ||
| obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name]) | ||
| for obj in objs | ||
| ] | ||
| else: | ||
| non_concat_axis = [obj.index for obj in objs] | ||
| no_sort_result_index = union_indexes(non_concat_axis, sort=False) | ||
| orig = result.index if orig_axis == 1 else result.columns | ||
| if not no_sort_result_index.equals(orig): | ||
| msg = ( | ||
| "Sorting by default when concatenating all DatetimeIndex is " | ||
| "deprecated. In the future, pandas will respect the default " | ||
| "of `sort=False`. Specify `sort=True` or `sort=False` to " | ||
| "silence this message. If you see this warnings when not " | ||
| "directly calling concat, report a bug to pandas." | ||
| ) | ||
| warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) | ||
|
|
||
| return result | ||
|
|
||
|
|
||
| def _sanitize_mixed_ndim( | ||
| objs: list[Series | DataFrame], | ||
|
|
@@ -510,7 +563,7 @@ def _get_result( | |
| bm_axis: AxisInt, | ||
| ignore_index: bool, | ||
| intersect: bool, | ||
| sort: bool, | ||
| sort: bool | lib.NoDefault, | ||
| keys: Iterable[Hashable] | None, | ||
| levels, | ||
| verify_integrity: bool, | ||
|
|
@@ -620,7 +673,7 @@ def new_axes( | |
| objs: list[Series | DataFrame], | ||
| bm_axis: AxisInt, | ||
| intersect: bool, | ||
| sort: bool, | ||
| sort: bool | lib.NoDefault, | ||
| keys: Iterable[Hashable] | None, | ||
| names: list[HashableT] | None, | ||
| axis: AxisInt, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GH ref here to the issue?