From 52e07b59ed9f4bb17245f4d305057238659360fc Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Sat, 25 Oct 2025 22:04:23 +0530
Subject: [PATCH 1/8] BUG: Validate numeric_only parameter in groupby
 aggregations

---
 pandas/core/groupby/groupby.py | 72 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index fe7bf5bbc4c2c..5714e0e5d5879 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1754,44 +1754,48 @@ def _cython_agg_general(
         **kwargs,
     ):
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
-        #  that goes through SeriesGroupBy
+        # that goes through SeriesGroupBy
 
-        data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
+        # Check to confirm numeric_only is fed either True or False and no other data type
+        if(isinstance(numeric_only, bool)):
+            data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
 
-        def array_func(values: ArrayLike) -> ArrayLike:
-            try:
-                result = self._grouper._cython_operation(
-                    "aggregate",
-                    values,
-                    how,
-                    axis=data.ndim - 1,
-                    min_count=min_count,
-                    **kwargs,
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-                # TODO: shouldn't min_count matter?
-                # TODO: avoid special casing SparseArray here
-                if how in ["any", "all"] and isinstance(values, SparseArray):
-                    pass
-                elif alt is None or how in ["any", "all", "std", "sem"]:
-                    raise  # TODO: re-raise as TypeError?  should not be reached
-            else:
-                return result
+            def array_func(values: ArrayLike) -> ArrayLike:
+                try:
+                    result = self._grouper._cython_operation(
+                        "aggregate",
+                        values,
+                        how,
+                        axis=data.ndim - 1,
+                        min_count=min_count,
+                        **kwargs,
+                    )
+                except NotImplementedError:
+                    # generally if we have numeric_only=False
+                    # and non-applicable functions
+                    # try to python agg
+                    # TODO: shouldn't min_count matter?
+                    # TODO: avoid special casing SparseArray here
+                    if how in ["any", "all"] and isinstance(values, SparseArray):
+                        pass
+                    elif alt is None or how in ["any", "all", "std", "sem"]:
+                        raise  # TODO: re-raise as TypeError?  should not be reached
+                else:
+                    return result
 
-            assert alt is not None
-            result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
-            return result
+                assert alt is not None
+                result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
+                return result
 
-        new_mgr = data.grouped_reduce(array_func)
-        res = self._wrap_agged_manager(new_mgr)
-        if how in ["idxmin", "idxmax"]:
-            # mypy expects how to be Literal["idxmin", "idxmax"].
-            res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])  # type: ignore[arg-type]
-        out = self._wrap_aggregated_output(res)
-        return out
+            new_mgr = data.grouped_reduce(array_func)
+            res = self._wrap_agged_manager(new_mgr)
+            if how in ["idxmin", "idxmax"]:
+                # mypy expects how to be Literal["idxmin", "idxmax"].
+                res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])  # type: ignore[arg-type]
+            out = self._wrap_aggregated_output(res)
+            return out
+        else:
+            raise ValueError("numeric_only accepts only Boolean values")
 
     def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
         raise AbstractMethodError(self)

From 9655a9a06440a25b8e9e400536637891dda5a88d Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Sun, 26 Oct 2025 03:18:44 +0530
Subject: [PATCH 2/8] Removed the if condition and added is_bool. Removed the
 unecessary comment on line 1759 and reverted the comment change on line 1757

---
 pandas/core/groupby/groupby.py | 72 +++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5714e0e5d5879..0ca9c1a72d6ea 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -86,6 +86,7 @@ class providing the base-class of operations.
     is_object_dtype,
     is_scalar,
     is_string_dtype,
+    is_bool,
     needs_i8_conversion,
     pandas_dtype,
 )
@@ -1756,46 +1757,45 @@ def _cython_agg_general(
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
         # that goes through SeriesGroupBy
 
-        # Check to confirm numeric_only is fed either True or False and no other data type
-        if(isinstance(numeric_only, bool)):
-            data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
+        if not is_bool(numeric_only):
+            raise ValueError("numeric_only accepts only Boolean values")
 
-            def array_func(values: ArrayLike) -> ArrayLike:
-                try:
-                    result = self._grouper._cython_operation(
-                        "aggregate",
-                        values,
-                        how,
-                        axis=data.ndim - 1,
-                        min_count=min_count,
-                        **kwargs,
-                    )
-                except NotImplementedError:
-                    # generally if we have numeric_only=False
-                    # and non-applicable functions
-                    # try to python agg
-                    # TODO: shouldn't min_count matter?
-                    # TODO: avoid special casing SparseArray here
-                    if how in ["any", "all"] and isinstance(values, SparseArray):
-                        pass
-                    elif alt is None or how in ["any", "all", "std", "sem"]:
-                        raise  # TODO: re-raise as TypeError?  should not be reached
-                else:
-                    return result
+        data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
 
-                assert alt is not None
-                result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
+        def array_func(values: ArrayLike) -> ArrayLike:
+            try:
+                result = self._grouper._cython_operation(
+                    "aggregate",
+                    values,
+                    how,
+                    axis=data.ndim - 1,
+                    min_count=min_count,
+                    **kwargs,
+                )
+            except NotImplementedError:
+                # generally if we have numeric_only=False
+                # and non-applicable functions
+                # try to python agg
+                # TODO: shouldn't min_count matter?
+                # TODO: avoid special casing SparseArray here
+                if how in ["any", "all"] and isinstance(values, SparseArray):
+                    pass
+                elif alt is None or how in ["any", "all", "std", "sem"]:
+                    raise  # TODO: re-raise as TypeError?  should not be reached
+            else:
                 return result
 
-            new_mgr = data.grouped_reduce(array_func)
-            res = self._wrap_agged_manager(new_mgr)
-            if how in ["idxmin", "idxmax"]:
-                # mypy expects how to be Literal["idxmin", "idxmax"].
-                res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])  # type: ignore[arg-type]
-            out = self._wrap_aggregated_output(res)
-            return out
-        else:
-            raise ValueError("numeric_only accepts only Boolean values")
+            assert alt is not None
+            result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
+            return result
+
+        new_mgr = data.grouped_reduce(array_func)
+        res = self._wrap_agged_manager(new_mgr)
+        if how in ["idxmin", "idxmax"]:
+            # mypy expects how to be Literal["idxmin", "idxmax"].
+            res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])  # type: ignore[arg-type]
+        out = self._wrap_aggregated_output(res)
+        return out
 
     def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
         raise AbstractMethodError(self)

From 6ef5696520b37c86d3331635b2c555dad09f88c3 Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Sun, 26 Oct 2025 04:15:30 +0530
Subject: [PATCH 3/8] Added a note to the whatsnew 3.0 with respect to my issue
 62778

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 75b4c5c0fe14d..c5c9e9d589456 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1150,6 +1150,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` would fail when the groups were :class:`Categorical` with an NA value (:issue:`61356`)
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
+- Bug in :meth:`.DataFrameGroupBy` reductions where boolean-valued inputs were mishandled in the Cython aggregation path (``_cython_agg_general``); adding an ``is_bool`` check fixes incorrect results for some bool inputs. (:issue:`62778`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`)

From 5a72c79ddc45dbf5b2fcf0feb27c9b7dfa9599e3 Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Sun, 26 Oct 2025 19:03:59 +0530
Subject: [PATCH 4/8] Added the suggested changes. Removed the redundant
 comments and tests from test_reductions.py

---
 pandas/core/groupby/groupby.py          |  2 +-
 pandas/tests/groupby/test_reductions.py | 13 ++-----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 0ca9c1a72d6ea..c227e96bb206f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1755,7 +1755,7 @@ def _cython_agg_general(
         **kwargs,
     ):
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
-        # that goes through SeriesGroupBy
+        #  that goes through SeriesGroupBy
 
         if not is_bool(numeric_only):
             raise ValueError("numeric_only accepts only Boolean values")
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 519e9ca974b5c..7212f4411d7b3 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -1522,13 +1522,10 @@ def test_groupby_std_datetimelike():
     tm.assert_frame_equal(result, expected)
 
 def test_mean_numeric_only_validates_bool():
-    """
-    Test that numeric_only parameter only accepts boolean values.
-    See GH#62778
-    """
+    # GH#62778
+
     df = pd.DataFrame({"A": range(5), "B": range(5)})
 
-    # These test cases should raise a ValueError
     msg = "numeric_only accepts only Boolean values"
     with pytest.raises(ValueError, match=msg):
         df.groupby(["A"]).mean(["B"])
@@ -1538,9 +1535,3 @@ def test_mean_numeric_only_validates_bool():
 
     with pytest.raises(ValueError, match=msg):
         df.groupby(["A"]).mean(numeric_only=1)
-
-    # These test cases should work absolutely fine
-    df.groupby(["A"]).mean()
-    df.groupby(["A"]).mean(numeric_only=True)
-    df.groupby(["A"]).mean(numeric_only=False)
-

From 8056bb863ec9a202854ebcfd1582d0e0e41c572a Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Sun, 26 Oct 2025 22:02:04 +0530
Subject: [PATCH 5/8] Change made to the comments in whatsnew 3.0

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c5c9e9d589456..7e02a20452b5c 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1150,7 +1150,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` would fail when the groups were :class:`Categorical` with an NA value (:issue:`61356`)
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
-- Bug in :meth:`.DataFrameGroupBy` reductions where boolean-valued inputs were mishandled in the Cython aggregation path (``_cython_agg_general``); adding an ``is_bool`` check fixes incorrect results for some bool inputs. (:issue:`62778`)
+- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`)

From 63f84433274fc640e104caf005b142b60c452a59 Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha4@gmail.com>
Date: Sun, 26 Oct 2025 20:46:41 +0000
Subject: [PATCH 6/8] Apply pre-commit formatting fixes

---
 pandas/core/groupby/groupby.py          | 6 ++----
 pandas/tests/groupby/test_reductions.py | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c227e96bb206f..910268ebdfb8a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -76,6 +76,7 @@ class providing the base-class of operations.
     ensure_dtype_can_hold_na,
 )
 from pandas.core.dtypes.common import (
+    is_bool,
     is_bool_dtype,
     is_float_dtype,
     is_hashable,
@@ -86,7 +87,6 @@ class providing the base-class of operations.
     is_object_dtype,
     is_scalar,
     is_string_dtype,
-    is_bool,
     needs_i8_conversion,
     pandas_dtype,
 )
@@ -110,9 +110,7 @@ class providing the base-class of operations.
     SparseArray,
 )
 from pandas.core.arrays.string_ import StringDtype
-from pandas.core.arrays.string_arrow import (
-    ArrowStringArray,
-)
+from pandas.core.arrays.string_arrow import ArrowStringArray
 from pandas.core.base import (
     PandasObject,
     SelectionMixin,
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 7212f4411d7b3..9c13ceec49e35 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -1521,10 +1521,11 @@ def test_groupby_std_datetimelike():
     expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
     tm.assert_frame_equal(result, expected)
 
+
 def test_mean_numeric_only_validates_bool():
     # GH#62778
 
-    df = pd.DataFrame({"A": range(5), "B": range(5)})
+    df = DataFrame({"A": range(5), "B": range(5)})
 
     msg = "numeric_only accepts only Boolean values"
     with pytest.raises(ValueError, match=msg):

From bb9b0ab9f27cf362d38bc541b9d206261c762d7b Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha4@gmail.com>
Date: Sun, 26 Oct 2025 21:02:54 +0000
Subject: [PATCH 7/8] Sort whatsnew entries alphabetically

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7e02a20452b5c..457ab6eb965ca 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1150,8 +1150,8 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` would fail when the groups were :class:`Categorical` with an NA value (:issue:`61356`)
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
-- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
+- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)

From 8a7918fcc5e1c0f69123727b2cfada89a3a13fd2 Mon Sep 17 00:00:00 2001
From: Zrahay <shubhangsinha44@gmail.com>
Date: Tue, 28 Oct 2025 22:54:35 +0530
Subject: [PATCH 8/8] Add .editorconfig for consistent editor settings

Define formatting rules for .py, .pyx, .c, meson.build, YAML, JSON, .sh, and .rst files to complement existing formatters and improve developer experience.

Closes #62736
---
 .editorconfig | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000000000..287395138d517
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,47 @@
+
+root = true
+
+[*]
+charset = UTF-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+spelling_language = en-us
+
+[*.py]
+max_line_length = 88
+indent_style = space
+indent_size = 4
+
+[*.pyx]
+max_line_length = 88
+indent_style = space
+indent_size = 4
+
+
+[*.c]
+max_line_length = 80
+indent_style = space
+indent_size = 2
+
+[meson.build]
+max_line_length = 80
+indent_style = space
+indent_size = 4
+
+[*.{yml,yaml}]
+indent_style = space
+indent_size = 4
+
+[*.json]
+indent_style = space
+indent_size = 4
+
+
+[*.sh]
+indent_style = space
+indent_size = 4
+
+[*.rst]
+indent_style = space
+indent_size = 4