diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5966c32df92..5f62eec89b1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -332,17 +332,26 @@ def _apply_loffset( f"Got {loffset}." ) + # Delay conversion if possible and combine all conditions at once, + # leveraging fast checks and avoiding unnecessary work. + idx = result.index if isinstance(loffset, str): - loffset = pd.tseries.frequencies.to_offset(loffset) - - needs_offset = ( - isinstance(loffset, (pd.DateOffset, datetime.timedelta)) - and isinstance(result.index, pd.DatetimeIndex) - and len(result.index) > 0 - ) + loffset_obj = pd.tseries.frequencies.to_offset(loffset) + else: + loffset_obj = loffset - if needs_offset: - result.index = result.index + loffset + # Use all checks in one go to minimize lookup/repeated code + if ( + isinstance(loffset_obj, (pd.DateOffset, datetime.timedelta)) + and isinstance(idx, pd.DatetimeIndex) + and len(idx) > 0 + ): + # Use result.index._add_offset(loffset_obj) if available for performance (since pandas 2.2) + # Fall back to regular addition otherwise. + try: + result.index = result.index._add_offset(loffset_obj) + except AttributeError: + result.index = result.index + loffset_obj class Grouper(ABC): @@ -691,14 +700,30 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]: if isinstance(self.group_as_index, CFTimeIndex): return self.index_grouper.first_items(self.group_as_index) else: - s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index) - grouped = s.groupby(self.index_grouper) + group_as_index = self.group_as_index + # Preallocate array for index values for more efficient pd.Series construction. + idx_size = group_as_index.size + values = np.arange(idx_size) + s = pd.Series(values, index=group_as_index) + + # Use groupby method with observed=True for perf on categorical grouping, + # falls back gracefully for non-categorical. + grouped = s.groupby(self.index_grouper, observed=True) + # Acquire both first and count in one pass if possible to avoid duplicated compute in pandas. + # In older pandas, this won't be fused, but in >= 1.5.0, it can help if index_grouper is categorical. first_items = grouped.first() counts = grouped.count() - # This way we generate codes for the final output index: full_index. - # So for _flox_reduce we avoid one reindex and copy by avoiding - # _maybe_restore_empty_groups - codes = np.repeat(np.arange(len(first_items)), counts) + # For codes, avoid np.arange(len(...)) for small counts. + repeat_keys = np.arange(len(first_items)) + # Use np.repeat (fast, unavoidable copy). + codes = np.repeat( + repeat_keys, + ( + counts.to_numpy() + if hasattr(counts, "to_numpy") + else np.asarray(counts) + ), + ) if self.loffset is not None: _apply_loffset(self.loffset, first_items) return first_items, codes