From dbec9f31a706b766c9a474ca3187d4d99b4da9d1 Mon Sep 17 00:00:00 2001
From: Eric Brown <ericbr@block.xyz>
Date: Fri, 14 Nov 2025 08:37:11 -0700
Subject: [PATCH 1/3] Deprecate weak and return_df args

---
 boruta/boruta_py.py        | 94 +++++++++++++++++---------------------
 boruta/test/test_boruta.py | 52 ++++++++++++++++++++-
 2 files changed, 92 insertions(+), 54 deletions(-)

diff --git a/boruta/boruta_py.py b/boruta/boruta_py.py
index d2a2d0a..296d0cd 100644
--- a/boruta/boruta_py.py
+++ b/boruta/boruta_py.py
@@ -142,6 +142,10 @@ class BorutaPy(BaseEstimator, SelectorMixin):
         The mask of selected tentative features, which haven't gained enough
         support during the max_iter number of iterations.
 
+    weak : bool, default=False
+
+        If set to true, the tentative features are also used to reduce X.
+
     ranking_ : array of shape [n_features]
 
         The feature ranking, such that ``ranking_[i]`` corresponds to the
@@ -194,7 +198,7 @@ class BorutaPy(BaseEstimator, SelectorMixin):
 
     def __init__(self, estimator, n_estimators=1000, perc=100, alpha=0.05,
                  two_step=True, max_iter=100, random_state=None, verbose=0,
-                 early_stopping=False, n_iter_no_change=20):
+                 early_stopping=False, n_iter_no_change=20, weak: bool = False):
         self.estimator = estimator
         self.n_estimators = n_estimators
         self.perc = perc
@@ -207,8 +211,9 @@ def __init__(self, estimator, n_estimators=1000, perc=100, alpha=0.05,
         self.n_iter_no_change = n_iter_no_change
         self.__version__ = '0.3'
         self._is_lightgbm = 'lightgbm' in str(type(self.estimator))
+        self.weak = weak
 
-    def fit(self, X, y):
+    def fit(self, X, y, **fit_params):
         """
         Fits the Boruta feature selection with the provided estimator.
 
@@ -223,7 +228,7 @@ def fit(self, X, y):
 
         return self._fit(X, y)
 
-    def transform(self, X, weak=False, return_df=False):
+    def transform(self, X, weak=None, return_df=None):
         """
         Reduces the input X to the features selected by Boruta.
 
@@ -232,23 +237,37 @@ def transform(self, X, weak=False, return_df=False):
         X : array-like, shape = [n_samples, n_features]
             The training input samples.
 
-        weak: boolean, default = False
-            If set to true, the tentative features are also used to reduce X.
-        
-        return_df : boolean, default = False
-            If ``X`` if a pandas dataframe and this parameter is set to True,
-            the transformed data will also be a dataframe.
+        weak : boolean, optional
+            Deprecated. Set ``weak`` in the constructor instead.
 
-        Returns
-        -------
-        X : array-like, shape = [n_samples, n_features_]
-            The input matrix X's columns are reduced to the features which were
-            selected by Boruta.
+        return_df : bool, optional
+            Deprecated. Output type now follows scikit-learn's standard
+            ``set_output``/``set_config`` mechanism.
         """
+        prev_weak = self.weak
+        if weak is not None:
+            warnings.warn(
+                "`weak` is deprecated and will be removed in a future release. "
+                "Set `weak` in the constructor instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+            self.weak = weak
+        if return_df is not None:
+            warnings.warn(
+                "`return_df` is deprecated and will be removed in a future "
+                "release. Use scikit-learn's `set_output(transform='pandas')` "
+                "or `set_config(transform_output='pandas')` instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+        try:
+            return super().transform(X)
+        finally:
+            if weak is not None:
+                self.weak = prev_weak
 
-        return self._transform(X, weak, return_df)
-
-    def fit_transform(self, X, y, weak=False, return_df=False):
+    def fit_transform(self, X, y=None, **fit_params):
         """
         Fits Boruta, then reduces the input X to the selected features.
 
@@ -259,23 +278,10 @@ def fit_transform(self, X, y, weak=False, return_df=False):
 
         y : array-like, shape = [n_samples]
             The target values.
-
-        weak: boolean, default = False
-            If set to true, the tentative features are also used to reduce X.
-
-        return_df : boolean, default = False
-            If ``X`` if a pandas dataframe and this parameter is set to True,
-            the transformed data will also be a dataframe.
-
-        Returns
-        -------
-        X : array-like, shape = [n_samples, n_features_]
-            The input matrix X's columns are reduced to the features which were
-            selected by Boruta.
         """
-
-        self._fit(X, y)
-        return self._transform(X, weak, return_df)
+        weak = fit_params.pop("weak", None)
+        return_df = fit_params.pop("return_df", None)
+        return self.fit(X, y, **fit_params).transform(X, weak=weak, return_df=return_df)
 
     def _validate_pandas_input(self, arg):
         try:
@@ -446,24 +452,6 @@ def _fit(self, X, y):
             self._print_results(dec_reg, _iter, 1)
         return self
 
-    def _transform(self, X, weak=False, return_df=False):
-        # sanity check
-        try:
-            self.ranking_
-        except AttributeError:
-            raise ValueError('You need to call the fit(X, y) method first.')
-
-        if weak:
-            indices = self.support_ + self.support_weak_
-        else:
-            indices = self.support_
-
-        if return_df:
-            X = X.iloc[:, indices]
-        else:
-            X = X[:, indices]
-        return X
-
     def _set_n_estimators(self, n_estimators):
         try:
             self.estimator.set_params(n_estimators=n_estimators)
@@ -476,7 +464,9 @@ def _set_n_estimators(self, n_estimators):
         return self
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'support_')
+        check_is_fitted(self, ['support_', 'support_weak_'])
+        if self.weak:
+            return np.logical_or(self.support_, self.support_weak_)
         return self.support_
 
     def _get_tree_num(self, n_feat):
diff --git a/boruta/test/test_boruta.py b/boruta/test/test_boruta.py
index 64a3691..0ff7536 100644
--- a/boruta/test/test_boruta.py
+++ b/boruta/test/test_boruta.py
@@ -1,6 +1,9 @@
+import re
+
 import numpy as np
 import pandas as pd
 import pytest
+from sklearn import config_context
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.exceptions import NotFittedError
 from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
@@ -65,8 +68,53 @@ def test_dataframe_is_returned(Xy):
     X_df, y_df = pd.DataFrame(X), pd.Series(y)
     rfc = RandomForestClassifier()
     bt = BorutaPy(rfc)
-    bt.fit(X_df, y_df)
-    assert isinstance(bt.transform(X_df, return_df=True), pd.DataFrame)
+    with config_context(transform_output="pandas"):
+        bt.fit(X_df, y_df)
+        transformed = bt.transform(X_df)
+    assert isinstance(transformed, pd.DataFrame)
+
+
+def test_return_df_parameter_emits_warning(Xy):
+    X, y = Xy
+    X_df, y_df = pd.DataFrame(X), pd.Series(y)
+    bt = BorutaPy(RandomForestClassifier())
+    with config_context(transform_output="pandas"):
+        bt.fit(X_df, y_df)
+        with pytest.warns(FutureWarning, match=re.escape("`set_output(transform='pandas')`")):
+            transformed = bt.transform(X_df, return_df=True)
+    assert isinstance(transformed, pd.DataFrame)
+
+
+def test_weak_attribute_controls_support_mask(Xy):
+    X, y = Xy
+    bt = BorutaPy(RandomForestClassifier(), weak=True)
+    bt.fit(X, y)
+
+    union_mask = bt.support_ | bt.support_weak_
+    assert np.array_equal(bt.get_support(), union_mask)
+
+
+def test_transform_with_weak_parameter_is_deprecated(Xy):
+    X, y = Xy
+    bt = BorutaPy(RandomForestClassifier())
+    bt.fit(X, y)
+    bt.support_[5] = False
+    bt.support_weak_[5] = True
+
+    with pytest.warns(FutureWarning, match=re.escape("`weak` is deprecated")):
+        transformed = bt.transform(X, weak=True)
+
+    expected_features = np.count_nonzero(bt.support_ | bt.support_weak_)
+    assert transformed.shape[1] == expected_features
+
+
+def test_fit_transform_with_weak_parameter_is_deprecated(Xy):
+    X, y = Xy
+    bt = BorutaPy(RandomForestClassifier())
+    with pytest.warns(FutureWarning, match=re.escape("`weak` is deprecated")):
+        transformed = bt.fit_transform(X, y, weak=True)
+    expected_features = np.count_nonzero(bt.support_ | bt.support_weak_)
+    assert transformed.shape[1] == expected_features
 
 
 def test_selector_mixin_get_support_requires_fit():

From 72cd436c0c40c1d73b01c127a1e0fd05235c0fc4 Mon Sep 17 00:00:00 2001
From: Eric Brown <ericbr@block.xyz>
Date: Fri, 14 Nov 2025 08:42:58 -0700
Subject: [PATCH 2/3] Remove _validate_pandas_input

---
 boruta/boruta_py.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/boruta/boruta_py.py b/boruta/boruta_py.py
index 296d0cd..92ff275 100644
--- a/boruta/boruta_py.py
+++ b/boruta/boruta_py.py
@@ -283,14 +283,6 @@ def fit_transform(self, X, y=None, **fit_params):
         return_df = fit_params.pop("return_df", None)
         return self.fit(X, y, **fit_params).transform(X, weak=weak, return_df=return_df)
 
-    def _validate_pandas_input(self, arg):
-        try:
-            return arg.values
-        except AttributeError:
-            raise ValueError(
-                "input needs to be a numpy array or pandas data frame."
-            )
-
     def _fit(self, X, y):
         # check input params
         self._check_params(X, y)
@@ -301,10 +293,7 @@ def _fit(self, X, y):
         else:
             self.feature_names_in_ = None
 
-        if not isinstance(X, np.ndarray):
-            X = self._validate_pandas_input(X) 
-        if not isinstance(y, np.ndarray):
-            y = self._validate_pandas_input(y)
+        X, y = check_X_y(X, y, accept_sparse=False, ensure_2d=True, dtype=None, estimator=self)
 
         self.n_features_in_ = X.shape[1]
 

From d55fa6aa2b0caac0bec66d128f9835f846c48dc2 Mon Sep 17 00:00:00 2001
From: Eric Brown <ericbr@block.xyz>
Date: Fri, 14 Nov 2025 09:45:33 -0700
Subject: [PATCH 3/3] Preserve functionality for return_df arg

---
 boruta/boruta_py.py        | 17 +++++++++++++++--
 boruta/test/test_boruta.py | 27 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/boruta/boruta_py.py b/boruta/boruta_py.py
index 92ff275..014d448 100644
--- a/boruta/boruta_py.py
+++ b/boruta/boruta_py.py
@@ -15,6 +15,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.feature_selection import SelectorMixin
 from sklearn.utils.validation import check_is_fitted
+from sklearn.utils._set_output import _get_output_config
 import warnings
 
 
@@ -240,7 +241,7 @@ def transform(self, X, weak=None, return_df=None):
         weak : boolean, optional
             Deprecated. Set ``weak`` in the constructor instead.
 
-        return_df : bool, optional
+        return_df : boolean, optional
             Deprecated. Output type now follows scikit-learn's standard
             ``set_output``/``set_config`` mechanism.
         """
@@ -253,6 +254,9 @@ def transform(self, X, weak=None, return_df=None):
                 stacklevel=2,
             )
             self.weak = weak
+        requested_transform = None
+        prev_output_config = None
+        force_numpy = return_df is False
         if return_df is not None:
             warnings.warn(
                 "`return_df` is deprecated and will be removed in a future "
@@ -261,11 +265,20 @@ def transform(self, X, weak=None, return_df=None):
                 FutureWarning,
                 stacklevel=2,
             )
+            prev_output_config = _get_output_config("transform", estimator=self)["dense"]
+            requested_transform = "pandas" if return_df else "default"
+            if prev_output_config != requested_transform:
+                self.set_output(transform=requested_transform)
         try:
-            return super().transform(X)
+            result = super().transform(X)
         finally:
             if weak is not None:
                 self.weak = prev_weak
+            if requested_transform is not None and prev_output_config != requested_transform:
+                self.set_output(transform=prev_output_config)
+        if force_numpy and hasattr(result, "to_numpy"):
+            result = result.to_numpy()
+        return result
 
     def fit_transform(self, X, y=None, **fit_params):
         """
diff --git a/boruta/test/test_boruta.py b/boruta/test/test_boruta.py
index 0ff7536..6b702c7 100644
--- a/boruta/test/test_boruta.py
+++ b/boruta/test/test_boruta.py
@@ -85,6 +85,33 @@ def test_return_df_parameter_emits_warning(Xy):
     assert isinstance(transformed, pd.DataFrame)
 
 
+def test_return_df_true_temporarily_enables_pandas_output(Xy):
+    X, y = Xy
+    bt = BorutaPy(RandomForestClassifier())
+    bt.fit(X, y)
+
+    baseline = bt.transform(X)
+    assert isinstance(baseline, np.ndarray)
+
+    with pytest.warns(FutureWarning, match="`return_df` is deprecated"):
+        transformed = bt.transform(X, return_df=True)
+    assert isinstance(transformed, pd.DataFrame)
+
+    reverted = bt.transform(X)
+    assert isinstance(reverted, np.ndarray)
+
+
+def test_return_df_false_with_dataframe_input_returns_numpy(Xy):
+    X, y = Xy
+    X_df = pd.DataFrame(X)
+    bt = BorutaPy(RandomForestClassifier())
+    bt.fit(X_df, y)
+
+    with pytest.warns(FutureWarning, match="`return_df` is deprecated"):
+        transformed = bt.transform(X_df, return_df=False)
+    assert isinstance(transformed, np.ndarray)
+
+
 def test_weak_attribute_controls_support_mask(Xy):
     X, y = Xy
     bt = BorutaPy(RandomForestClassifier(), weak=True)