From 4e8ccf6958c149c38d27a2b659bde198e9cc21c3 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 01/10] code impl and examples --- pandas/core/groupby/generic.py | 51 +++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..cb85121079ead 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,7 +16,6 @@ TYPE_CHECKING, Any, Literal, - NamedTuple, TypeAlias, TypeVar, cast, @@ -113,11 +112,32 @@ @set_module("pandas") -class NamedAgg(NamedTuple): +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. - Subclass of typing.NamedTuple. + Subclass of tuple that wraps an aggregation function. Parameters ---------- @@ -126,6 +146,10 @@ class NamedAgg(NamedTuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. See Also -------- @@ -141,6 +165,25 @@ class NamedAgg(NamedTuple): key 1 -1 10.5 2 1 12.0 + + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between + key + 1 1 + 2 1 + + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw + key + 1 1 + 2 1 """ column: Hashable From 099e078ccde5627ef8dfddf67a104811e664c978 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 02/10] code impl and examples --- pandas/core/groupby/generic.py | 51 +++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..cb85121079ead 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,7 +16,6 @@ TYPE_CHECKING, Any, Literal, - NamedTuple, TypeAlias, TypeVar, cast, @@ -113,11 +112,32 @@ @set_module("pandas") -class NamedAgg(NamedTuple): +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. - Subclass of typing.NamedTuple. + Subclass of tuple that wraps an aggregation function. Parameters ---------- @@ -126,6 +146,10 @@ class NamedAgg(NamedTuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. See Also -------- @@ -141,6 +165,25 @@ class NamedAgg(NamedTuple): key 1 -1 10.5 2 1 12.0 + + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between + key + 1 1 + 2 1 + + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw + key + 1 1 + 2 1 """ column: Hashable From a52f97c4b3a68977dfa4d7ce5218e7fa10902f83 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:04:27 -0400 Subject: [PATCH 03/10] unit tests --- .../tests/groupby/aggregate/test_aggregate.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c968587c469d1..5fb3666b4cdb3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -866,6 +866,57 @@ def test_agg_namedtuple(self): expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) tm.assert_frame_equal(result, expected) + def n_between(self, ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + def test_namedagg_args(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between=pd.NamedAgg("B", self.n_between, 0, 1) + ) + expected = DataFrame({"count_between": [1, 1]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(result, expected) + + def test_namedagg_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_kw=pd.NamedAgg("B", self.n_between, 0, 1, inclusive="both") + ) + expected = DataFrame( + {"count_between_kw": [1, 1]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_namedagg_args_and_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_mix=pd.NamedAgg( + "B", self.n_between, 0, 1, inclusive="neither" + ) + ) + expected = DataFrame( + {"count_between_mix": [0, 0]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_multiple_named_agg_with_args_and_kwargs(self): + df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]}) + + result = df.groupby("A").agg( + n_between01=pd.NamedAgg("B", self.n_between, 0, 1), + n_between13=pd.NamedAgg("B", self.n_between, 1, 3), + n_between02=pd.NamedAgg("B", self.n_between, 0, 2), + ) + expected = df.groupby("A").agg( + n_between01=("B", lambda x: x.between(0, 1).sum()), + n_between13=("B", lambda x: x.between(0, 3).sum()), + n_between02=("B", lambda x: x.between(0, 2).sum()), + ) + tm.assert_frame_equal(result, expected) + def test_mangled(self): df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) From abfbcbb74d6bf103fd7390d9998368f48b4b3e61 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:13:31 -0400 Subject: [PATCH 04/10] add typehint --- pandas/core/groupby/generic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cb85121079ead..a4a5be6c459b7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,6 +16,7 @@ TYPE_CHECKING, Any, Literal, + Self, TypeAlias, TypeVar, cast, @@ -115,7 +116,13 @@ class NamedAgg(tuple): __slots__ = () - def __new__(cls, column, aggfunc, *args, **kwargs): + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: if ( callable(aggfunc) and not getattr(aggfunc, "_is_wrapped", False) @@ -123,7 +130,7 @@ def __new__(cls, column, aggfunc, *args, **kwargs): ): original_func = aggfunc - def wrapped(*call_args, **call_kwargs): + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: series = call_args[0] final_args = call_args[1:] + args final_kwargs = {**kwargs, **call_kwargs} From d5eda5151ef16afcffda23fabb50843c13abab96 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:16:30 -0400 Subject: [PATCH 05/10] whats new --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index eb938a7140e29..79ced506ce1ab 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,6 +156,8 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ +- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs`` + to calls of ``aggfunc`` (:issue:`58283`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. From 2a0b78bcbb36fdfa90c8daf133e63c6bde3e11a0 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:58:38 -0400 Subject: [PATCH 06/10] mypy --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a4a5be6c459b7..4e178cc878dff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -136,7 +136,7 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: final_kwargs = {**kwargs, **call_kwargs} return original_func(series, *final_args, **final_kwargs) - wrapped._is_wrapped = True + wrapped._is_wrapped = True # type: ignore[attr-defined] aggfunc = wrapped return super().__new__(cls, (column, aggfunc)) From 1b0b67a96008a3bba7c21f69a98a1c8026335f27 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:00:36 -0400 Subject: [PATCH 07/10] mypy --- pandas/core/groupby/generic.py | 52 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4e178cc878dff..37dd3cb59b527 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -114,32 +114,6 @@ @set_module("pandas") class NamedAgg(tuple): - __slots__ = () - - def __new__( - cls, - column: Hashable, - aggfunc: Callable[..., Any] | str, - *args: Any, - **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) - """ Helper for column specific aggregation with with flexible argument passing and control over output column names. @@ -196,6 +170,32 @@ def n_between(ser, low, high, **kwargs): column: Hashable aggfunc: AggScalar + __slots__ = () + + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True # type: ignore[attr-defined] + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + @set_module("pandas.api.typing") class SeriesGroupBy(GroupBy[Series]): From 2aa1b704d3a904c553861abc6b641c6689fb6e1a Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:26:05 -0400 Subject: [PATCH 08/10] doc string validation --- pandas/core/groupby/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 37dd3cb59b527..43941b0d4cd54 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,8 +115,7 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for column specific aggregation with with flexible argument passing and - control over output column names. + Helper for defining named aggregations in groupby operations. Subclass of tuple that wraps an aggregation function. From 7efd59d11f63f8212b728e42b6abc93c24628aad Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:52:21 -0400 Subject: [PATCH 09/10] doc --- pandas/core/groupby/generic.py | 37 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 43941b0d4cd54..ea0c4bab68688 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,9 +115,10 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in groupby operations. + Helper for defining named aggregations in ``DataFrame.groupby().agg``. - Subclass of tuple that wraps an aggregation function. + Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit + output names. Parameters ---------- @@ -126,10 +127,8 @@ class NamedAgg(tuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. - *args : tuple, optional - Positional arguments to pass to `aggfunc` when it is called. - **kwargs : dict, optional - Keyword arguments to pass to `aggfunc` when it is called. + *args, **kwargs : + Optional positional and keyword arguments passed to ``aggfunc``. See Also -------- @@ -137,30 +136,28 @@ class NamedAgg(tuple): Examples -------- - >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], "b": [10, 11, 12]}) >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") - >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x)) - >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) - result_a result_1 + >>> agg_b = pd.NamedAgg(column="b", aggfunc=lambda x: x.mean()) + >>> df.groupby("key").agg(result_a=agg_a, result_b=agg_b) + result_a result_b key 1 -1 10.5 2 1 12.0 - def n_between(ser, low, high, **kwargs): - return ser.between(low, high, **kwargs).sum() + >>> def n_between(ser, low, high, **kwargs): + ... return ser.between(low, high, **kwargs).sum() - Using positional arguments - agg_between = pd.NamedAgg("a", n_between, 0, 1) - df.groupby("key").agg(count_between=agg_between) - count_between + >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) + >>> df.groupby("key").agg(count_between=agg_between) + count_between key 1 1 2 1 - Using both positional and keyword arguments - agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") - df.groupby("key").agg(count_between_kw=agg_between_kw) - count_between_kw + >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + >>> df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw key 1 1 2 1 From fccb672c688e286b21c3ee8a66950c61046fa44e Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:54:22 -0400 Subject: [PATCH 10/10] doc --- pandas/core/groupby/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea0c4bab68688..c4a8049a307ac 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,10 +115,9 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in ``DataFrame.groupby().agg``. + Helper for column specific aggregation with control over output column names. - Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit - output names. + Subclass of tuple. Parameters ----------