pprett · jaberg · Dec 19, 2011 · Dec 21, 2011 · Dec 21, 2011
diff --git a/sklearn/ensemble/boosting.py b/sklearn/ensemble/boosting.py
@@ -0,0 +1,200 @@
+"""
+Algorithms for Boosting:
+- Functional Gradient Descent
+"""
+
+# Authors: James Bergstra
+# License: BSD3
+
+import numpy as np
+
+from ..utils import safe_asarray
+from ..ensemble import BaseEnsemble
+from ..base import RegressorMixin
+
+
+class LossFunction(object):
+    """Abstract base class for various loss functions."""
+
+    def init_estimator(self, X, y):
+        pass
+
+    def __call__(self, y, pred):
+        pass
+
+    def negative_gradient(self, y, pred):
+        """Compute the negative gradient."""
+        pass
+
+
+class LeastSquaresError(LossFunction):
+    """Loss function for least squares (LS) estimation.
+    Terminal regions need not to be updated for least squares. """
+
+    def init_estimator(self):
+        return MeanPredictor()
+
+    def __call__(self, y, pred):
+        return np.mean((y - pred) ** 2.0)
+
+    def negative_gradient(self, y, pred):
+        return y - pred
+
+
+class LeastAbsoluteError(LossFunction):
+    """Loss function for least absolute deviation (LAD) regression. """
+
+    def init_estimator(self):
+        return MedianPredictor()
+
+    def __call__(self, y, pred):
+        return np.abs(y - pred).mean()
+
+    def negative_gradient(self, y, pred):
+        return np.sign(y - pred)
+
+
+class BinomialDeviance(LossFunction):
+    """Binomial deviance loss function for binary classification."""
+
+    def init_estimator(self):
+        return ClassPriorPredictor()
+
+    def __call__(self, y, pred):
+        """Compute the deviance (= negative log-likelihood). """
+        ## return -2.0 * np.sum(y * pred -
+        ##                      np.log(1.0 + np.exp(pred))) / y.shape[0]
+
+        # logaddexp(0, v) == log(1.0 + exp(v))
+        return -2.0 * np.sum(y * pred -
+                             np.logaddexp(0.0, pred)) / y.shape[0]
+
+    def negative_gradient(self, y, pred):
+        return y - 1.0 / (1.0 + np.exp(-pred))
+
+
+class FunctionalGradient(object):
+    def __init__(self, loss, X, y):
+        self.loss = loss
+        self.X = X
+        self.y = y
+        self.residual = np.array(y)   # copies, ensures array
+
+    def current_Xy(self):
+        return self.X, self.residual
+
+    def update(self, prediction):
+        self.residual = self.loss.negative_gradient(self.residual, prediction)
+
+
+class FitNIter(object):
+    """
+    Iterations (self.next()) implement one round of functional gradient
+    boosting.
+
+
+    """
+    def __init__(self, ensemble, fg, n_iters):
+        self.ensemble = ensemble
+        self.fg = fg
+        self.n_iters = n_iters
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.n_iters == len(self.ensemble.estimators_):
+            raise StopIteration
+        base = self.ensemble._make_estimator()
+        X, y = self.fg.current_Xy()
+        base.fit(X, y)
+        self.fg.update(base.predict(X))
+        return self
+
+
+class GradientBoostedRegressor(BaseEnsemble, RegressorMixin):
+    """
+    Regression Boosting via functional gradient descent.
+
+    The algorithm is to construct a regression ensemble by using a "base
+    estimator" to repeatedly fit residual training error. So for example, the
+    first iteration fits some function f() to the original (X, y) training
+    data, and the second iteration fits some g() to (X, y - f(X)), the third
+    iterations fits some h() to (X y - f(X) - g(X)), and so on.  The final
+    ensemble is f() + g() + h() + ...
+
+    This procedure is equivalent to functional gradient descent when the the
+    training objective is to minimize mean squared error (MSE).
+
+    For more information see e.g.:
+    J. H. Friedman (2002). "Stochastic Gradient Boosting",
+    Computational Statistics & Data Analysis.
+
+    TODO: Mason has a good paper on the subject as well.
+    """
+
+    def __init__(self, base_estimator, n_estimators,
+            loss=LeastSquaresError):
+        super(GradientBoostedRegressor, self).__init__(
+            base_estimator=base_estimator,
+            n_estimators=n_estimators)
+        self.loss = loss
+
+    def fit_iter(self, X, y):
+        """Create a fitting iterator for training set X, y.
+
+        See class FitIter().
+        """
+        if 'int' in str(y.dtype):
+            raise TypeError('Regression of int-valued targets is ambiguous'
+                    '. Please cast to float if you want to train using a '
+                    'regression criterion.')
+        if issubclass(self.loss, LossFunction):
+            loss = self.loss()
+        else:
+            loss = self.loss
+        return FitNIter(
+                ensemble=self,
+                fg=FunctionalGradient(loss, X, y),
+                n_iters=self.n_estimators)
+
+    def fit(self, X, y):
+        """Build a regression ensemble by funtional gradient boosting.
+
+        Parameters
+        ----------
+        X : array-like of shape = [n_samples, n_features]
+            The training input samples.
+
+        y : array-like, shape = [n_samples]
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        Return
+        ------
+        self : object
+            Returns self.
+        """
+        for _ in self.fit_iter(X, y):
+            pass
+        return self
+
+    def predict(self, X):
+        """Return the prediction for array-like X.
+
+        Parameters
+        ----------
+        X : array-like of shape = [n_samples, n_features]
+            Test samples.
+
+        Return
+        ------
+        prediction : numpy array of shape = [n_samples]
+            Test predictions.
+
+        """
+        rval = self.estimators_[0].predict(X)
+        for estimator in self.estimators_[1:]:
+            pred_i = estimator.predict(X) 
+            rval += pred_i
+        return rval
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
@@ -23,6 +23,11 @@
 from ..tree._tree import MSE
 from ..tree._tree import DTYPE
 
+from .boosting import LossFunction
+from .boosting import LeastSquaresError
+from .boosting import LeastAbsoluteError
+from .boosting import BinomialDeviance
+
 
 # ignore overflows due to exp(-pred) in BinomailDeviance
 np.seterr(invalid='raise', under='raise', divide='raise', over='ignore')
@@ -89,20 +94,7 @@ def predict(self, X):
         y.fill(self.prior)
         return y
 
-
-class LossFunction(object):
-    """Abstract base class for various loss functions."""
-
-    def init_estimator(self, X, y):
-        pass
-
-    def __call__(self, y, pred):
-        pass
-
-    def negative_gradient(self, y, pred):
-        """Compute the negative gradient."""
-        pass
-
+class TreeLossMixin(object):
     def update_terminal_regions(self, tree, X, y, residual, y_pred,
                                 learn_rate=1.0):
         """Update the terminal regions (=leaves) of the given tree and
@@ -125,57 +117,24 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
         pass
 
 
-class LeastSquaresError(LossFunction):
+class TreeLeastSquaresError(LeastSquaresError, TreeLossMixin):
     """Loss function for least squares (LS) estimation.
     Terminal regions need not to be updated for least squares. """
 
-    def init_estimator(self):
-        return MeanPredictor()
-
-    def __call__(self, y, pred):
-        return np.mean((y - pred) ** 2.0)
 
-    def negative_gradient(self, y, pred):
-        return y - pred
-
-
-class LeastAbsoluteError(LossFunction):
+class TreeLeastAbsoluteError(LeastAbsoluteError, TreeLossMixin):
     """Loss function for least absolute deviation (LAD) regression. """
 
-    def init_estimator(self):
-        return MedianPredictor()
-
-    def __call__(self, y, pred):
-        return np.abs(y - pred).mean()
-
-    def negative_gradient(self, y, pred):
-        return np.sign(y - pred)
-
     def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
                                 residual, pred):
         """LAD updates terminal regions to median estimates. """
         tree.value[leaf, 0] = np.median(y.take(terminal_region, axis=0) - \
                                         pred.take(terminal_region, axis=0))
 
 
-class BinomialDeviance(LossFunction):
+class TreeBinomialDeviance(BinomialDeviance, TreeLossMixin):
     """Binomial deviance loss function for binary classification."""
 
-    def init_estimator(self):
-        return ClassPriorPredictor()
-
-    def __call__(self, y, pred):
-        """Compute the deviance (= negative log-likelihood). """
-        ## return -2.0 * np.sum(y * pred -
-        ##                      np.log(1.0 + np.exp(pred))) / y.shape[0]
-
-        # logaddexp(0, v) == log(1.0 + exp(v))
-        return -2.0 * np.sum(y * pred -
-                             np.logaddexp(0.0, pred)) / y.shape[0]
-
-    def negative_gradient(self, y, pred):
-        return y - 1.0 / (1.0 + np.exp(-pred))
-
     def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
                                 residual, pred):
         """Make a single Newton-Raphson step. """
@@ -191,9 +150,9 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
             tree.value[leaf, 0] = numerator / denominator
 
 
-LOSS_FUNCTIONS = {'ls': LeastSquaresError,
-                  'lad': LeastAbsoluteError,
-                  'deviance': BinomialDeviance}
+LOSS_FUNCTIONS = {'ls': TreeLeastSquaresError,
+                  'lad': TreeLeastAbsoluteError,
+                  'deviance': TreeBinomialDeviance}
 
 
 class BaseGradientBoosting(BaseEstimator):

diff --git a/sklearn/ensemble/tests/test_boosting.py b/sklearn/ensemble/tests/test_boosting.py
@@ -0,0 +1,50 @@
+from unittest import TestCase
+import numpy as np
+from sklearn.ensemble.boosting import GradientBoostedRegressor
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.datasets.base import load_boston
+
+class TestGradientBoostedRegressor(TestCase):
+    def setUp(self):
+        self.task = load_boston()
+        self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4)
+        self.boosting = GradientBoostedRegressor(
+                base_estimator=DecisionTreeRegressor(
+                    max_depth=2,
+                    min_split=4),
+                n_estimators=5)
+
+    def test_fit_returns_self(self):
+        r = self.boosting.fit(self.task['data'], self.task['target'])
+        assert r is self.boosting
+
+    def test_1_estimator_matches_base(self):
+        self.boosting = GradientBoostedRegressor(
+                base_estimator=DecisionTreeRegressor(
+                    max_depth=2,
+                    min_split=4),
+                n_estimators=1)
+        self.base_est.fit(self.task['data'], self.task['target'])
+        self.boosting.fit(self.task['data'], self.task['target'])
+        pred1 = self.base_est.predict(self.task['data'])
+        pred2 = self.boosting.predict(self.task['data'])
+        self.assert_(np.allclose(pred1, pred2))
+
+    def test_n_estimators(self):
+        assert len(self.boosting.estimators_) == 0
+        self.boosting.fit(self.task['data'], self.task['target'])
+        assert len(self.boosting.estimators_) == self.boosting.n_estimators
+
+    def test_int_y_not_implemented(self):
+        self.assertRaises(TypeError,
+                self.boosting.fit,
+                np.ones((4, 5)), np.arange(4).astype('int'))
+
+    def test_mse_always_goes_down(self):
+        model = self.boosting
+        task = self.task
+        mse_list = []
+        for fit_iter in model.fit_iter(task['data'], task['target']):
+            mse_list.append(np.mean(fit_iter.fg.residual ** 2))
+            if len(mse_list) > 1:
+                self.assert_(mse_list[-1] < mse_list[-2])