From d5298d96c7698db2c679d685a3a8742afa5484cc Mon Sep 17 00:00:00 2001
From: James Bergstra <james.bergstra@gmail.com>
Date: Mon, 19 Dec 2011 08:49:55 -0500
Subject: [PATCH 1/3] initial commit of FunctionalGradientBoosting

---
 sklearn/boosting.py            | 128 +++++++++++++++++++++++++++++++++
 sklearn/tests/test_boosting.py |  50 +++++++++++++
 2 files changed, 178 insertions(+)
 create mode 100644 sklearn/boosting.py
 create mode 100644 sklearn/tests/test_boosting.py

diff --git a/sklearn/boosting.py b/sklearn/boosting.py
new file mode 100644
index 0000000000000..948154dfeaf83
--- /dev/null
+++ b/sklearn/boosting.py
@@ -0,0 +1,128 @@
+"""
+Algorithms for Boosting:
+- Functional Gradient Descent
+"""
+
+# Authors: James Bergstra
+# License: BSD3
+
+import numpy as np
+
+from .utils import safe_asarray
+from .ensemble import BaseEnsemble
+
+
+class FitIter(object):
+    """
+    Iterations (self.next()) implement one round of functional gradient
+    boosting.
+
+    Attributes
+    ----------
+    fgb : the FunctionalGradientBoosting instance
+        FitIter implements the self.fit of this object.
+
+    X : array-like of shape = [n_samples, n_features]
+        Training input samples
+
+    residual : array of shape = [n_samples]
+        Running regression target (originally the training target)
+    
+    N.B. This object works in-place on self.residual
+
+    """
+    def __init__(self, fgb, X, residual):
+        self.fgb = fgb
+        self.X = X
+        self.residual = residual
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.fgb.n_estimators == len(self.fgb.estimators_):
+            raise StopIteration
+        if self.fgb.estimators_:
+            self.residual -= self.fgb.estimators_[-1].predict(self.X)
+        base = self.fgb._make_estimator()
+        base.fit(self.X, self.residual)
+        return self
+
+
+class FunctionalGradientBoosting(BaseEnsemble):
+    """
+    Regression Boosting via functional gradient descent.
+
+    The algorithm is to construct a regression ensemble by using a "base
+    estimator" to repeatedly fit residual training error. So for example, the
+    first iteration fits some function f() to the original (X, y) training
+    data, and the second iteration fits some g() to (X, y - f(X)), the third
+    iterations fits some h() to (X y - f(X) - g(X)), and so on.  The final
+    ensemble is f() + g() + h() + ...
+
+    This procedure is equivalent to functional gradient descent when the the
+    training objective is to minimize mean squared error (MSE).
+
+    For more information see e.g.:
+    J. H. Friedman (2002). "Stochastic Gradient Boosting",
+    Computational Statistics & Data Analysis.
+
+    TODO: Mason has a good paper on the subject as well.
+    """
+
+    def __init__(self, base_estimator, n_estimators,):
+        super(FunctionalGradientBoosting, self).__init__(
+            base_estimator=base_estimator,
+            n_estimators=n_estimators)
+
+    def fit_iter(self, X, y):
+        """Create a fitting iterator for training set X, y.
+
+        See class FitIter().
+        """
+        X = safe_asarray(X)
+        y = np.array(y)    # N.B. makes a copy
+        if 'int' in str(y.dtype):
+            raise NotImplementedError('ints typically mean classif')
+        return FitIter(self, X, y)
+
+    def fit(self, X, y):
+        """Build a regression ensemble by funtional gradient boosting.
+
+        Parameters
+        ----------
+        X : array-like of shape = [n_samples, n_features]
+            The training input samples.
+
+        y : array-like, shape = [n_samples]
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        Return
+        ------
+        self : object
+            Returns self.
+        """
+        for _ in self.fit_iter(X, y):
+            pass
+        return self
+
+    def predict(self, X):
+        """Return the prediction for array-like X.
+        
+        Parameters
+        ----------
+        X : array-like of shape = [n_samples, n_features]
+            Test samples.
+
+        Return
+        ------
+        prediction : numpy array of shape = [n_samples]
+            Test predictions.
+
+        """
+        rval = self.estimators_[0].predict(X)
+        for estimator in self.estimators_[1:]:
+            pred_i = estimator.predict(X) 
+            rval += pred_i
+        return rval
diff --git a/sklearn/tests/test_boosting.py b/sklearn/tests/test_boosting.py
new file mode 100644
index 0000000000000..30c4d1c0a9bc1
--- /dev/null
+++ b/sklearn/tests/test_boosting.py
@@ -0,0 +1,50 @@
+from unittest import TestCase
+import numpy as np
+from sklearn.boosting import FunctionalGradientBoosting
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.datasets.base import load_boston
+
+class TestFunctionalGradientBoosting(TestCase):
+    def setUp(self):
+        self.task = load_boston()
+        self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4)
+        self.boosting = FunctionalGradientBoosting(
+                base_estimator=DecisionTreeRegressor(
+                    max_depth=2,
+                    min_split=4),
+                n_estimators=5)
+
+    def test_fit_returns_self(self):
+        r = self.boosting.fit(self.task['data'], self.task['target'])
+        assert r is self.boosting
+
+    def test_1_estimator_matches_base(self):
+        self.boosting = FunctionalGradientBoosting(
+                base_estimator=DecisionTreeRegressor(
+                    max_depth=2,
+                    min_split=4),
+                n_estimators=1)
+        self.base_est.fit(self.task['data'], self.task['target'])
+        self.boosting.fit(self.task['data'], self.task['target'])
+        pred1 = self.base_est.predict(self.task['data'])
+        pred2 = self.boosting.predict(self.task['data'])
+        self.assert_(np.allclose(pred1, pred2))
+
+    def test_n_estimators(self):
+        assert len(self.boosting.estimators_) == 0
+        self.boosting.fit(self.task['data'], self.task['target'])
+        assert len(self.boosting.estimators_) == self.boosting.n_estimators
+
+    def test_int_y_not_implemented(self):
+        self.assertRaises(NotImplementedError,
+                self.boosting.fit,
+                np.ones((4, 5)), np.arange(4).astype('int'))
+
+    def test_mse_always_goes_down(self):
+        model = self.boosting
+        task = self.task
+        mse_list = []
+        for fit_iter in model.fit_iter(task['data'], task['target']):
+            mse_list.append(np.mean(fit_iter.residual ** 2))
+            if len(mse_list) > 1:
+                self.assert_(mse_list[-1] < mse_list[-2])

From 7aef569f7ed941d8ccc1b472937566d6fc1cdcb8 Mon Sep 17 00:00:00 2001
From: James Bergstra <james.bergstra@gmail.com>
Date: Wed, 21 Dec 2011 09:26:57 -0500
Subject: [PATCH 2/3] moved my boosting file into ensemble dir

---
 sklearn/{ => ensemble}/boosting.py            | 0
 sklearn/{ => ensemble}/tests/test_boosting.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename sklearn/{ => ensemble}/boosting.py (100%)
 rename sklearn/{ => ensemble}/tests/test_boosting.py (100%)

diff --git a/sklearn/boosting.py b/sklearn/ensemble/boosting.py
similarity index 100%
rename from sklearn/boosting.py
rename to sklearn/ensemble/boosting.py
diff --git a/sklearn/tests/test_boosting.py b/sklearn/ensemble/tests/test_boosting.py
similarity index 100%
rename from sklearn/tests/test_boosting.py
rename to sklearn/ensemble/tests/test_boosting.py

From 98092a3147d86f6a577dab747c4c91b5b98b6b3e Mon Sep 17 00:00:00 2001
From: James Bergstra <james.bergstra@gmail.com>
Date: Wed, 21 Dec 2011 10:19:50 -0500
Subject: [PATCH 3/3] trying to refactor functional gradient to be used beyond
 trees

---
 sklearn/ensemble/boosting.py            | 134 ++++++++++++++++++------
 sklearn/ensemble/gradient_boosting.py   |  65 +++---------
 sklearn/ensemble/tests/test_boosting.py |  12 +--
 3 files changed, 121 insertions(+), 90 deletions(-)

diff --git a/sklearn/ensemble/boosting.py b/sklearn/ensemble/boosting.py
index 948154dfeaf83..ff0b2ddd53e01 100644
--- a/sklearn/ensemble/boosting.py
+++ b/sklearn/ensemble/boosting.py
@@ -8,48 +8,111 @@
 
 import numpy as np
 
-from .utils import safe_asarray
-from .ensemble import BaseEnsemble
+from ..utils import safe_asarray
+from ..ensemble import BaseEnsemble
+from ..base import RegressorMixin
 
 
-class FitIter(object):
+class LossFunction(object):
+    """Abstract base class for various loss functions."""
+
+    def init_estimator(self, X, y):
+        pass
+
+    def __call__(self, y, pred):
+        pass
+
+    def negative_gradient(self, y, pred):
+        """Compute the negative gradient."""
+        pass
+
+
+class LeastSquaresError(LossFunction):
+    """Loss function for least squares (LS) estimation.
+    Terminal regions need not to be updated for least squares. """
+
+    def init_estimator(self):
+        return MeanPredictor()
+
+    def __call__(self, y, pred):
+        return np.mean((y - pred) ** 2.0)
+
+    def negative_gradient(self, y, pred):
+        return y - pred
+
+
+class LeastAbsoluteError(LossFunction):
+    """Loss function for least absolute deviation (LAD) regression. """
+
+    def init_estimator(self):
+        return MedianPredictor()
+
+    def __call__(self, y, pred):
+        return np.abs(y - pred).mean()
+
+    def negative_gradient(self, y, pred):
+        return np.sign(y - pred)
+
+
+class BinomialDeviance(LossFunction):
+    """Binomial deviance loss function for binary classification."""
+
+    def init_estimator(self):
+        return ClassPriorPredictor()
+
+    def __call__(self, y, pred):
+        """Compute the deviance (= negative log-likelihood). """
+        ## return -2.0 * np.sum(y * pred -
+        ##                      np.log(1.0 + np.exp(pred))) / y.shape[0]
+
+        # logaddexp(0, v) == log(1.0 + exp(v))
+        return -2.0 * np.sum(y * pred -
+                             np.logaddexp(0.0, pred)) / y.shape[0]
+
+    def negative_gradient(self, y, pred):
+        return y - 1.0 / (1.0 + np.exp(-pred))
+
+
+class FunctionalGradient(object):
+    def __init__(self, loss, X, y):
+        self.loss = loss
+        self.X = X
+        self.y = y
+        self.residual = np.array(y)   # copies, ensures array
+
+    def current_Xy(self):
+        return self.X, self.residual
+
+    def update(self, prediction):
+        self.residual = self.loss.negative_gradient(self.residual, prediction)
+
+
+class FitNIter(object):
     """
     Iterations (self.next()) implement one round of functional gradient
     boosting.
 
-    Attributes
-    ----------
-    fgb : the FunctionalGradientBoosting instance
-        FitIter implements the self.fit of this object.
-
-    X : array-like of shape = [n_samples, n_features]
-        Training input samples
-
-    residual : array of shape = [n_samples]
-        Running regression target (originally the training target)
-    
-    N.B. This object works in-place on self.residual
 
     """
-    def __init__(self, fgb, X, residual):
-        self.fgb = fgb
-        self.X = X
-        self.residual = residual
+    def __init__(self, ensemble, fg, n_iters):
+        self.ensemble = ensemble
+        self.fg = fg
+        self.n_iters = n_iters
 
     def __iter__(self):
         return self
 
     def next(self):
-        if self.fgb.n_estimators == len(self.fgb.estimators_):
+        if self.n_iters == len(self.ensemble.estimators_):
             raise StopIteration
-        if self.fgb.estimators_:
-            self.residual -= self.fgb.estimators_[-1].predict(self.X)
-        base = self.fgb._make_estimator()
-        base.fit(self.X, self.residual)
+        base = self.ensemble._make_estimator()
+        X, y = self.fg.current_Xy()
+        base.fit(X, y)
+        self.fg.update(base.predict(X))
         return self
 
 
-class FunctionalGradientBoosting(BaseEnsemble):
+class GradientBoostedRegressor(BaseEnsemble, RegressorMixin):
     """
     Regression Boosting via functional gradient descent.
 
@@ -70,21 +133,30 @@ class FunctionalGradientBoosting(BaseEnsemble):
     TODO: Mason has a good paper on the subject as well.
     """
 
-    def __init__(self, base_estimator, n_estimators,):
-        super(FunctionalGradientBoosting, self).__init__(
+    def __init__(self, base_estimator, n_estimators,
+            loss=LeastSquaresError):
+        super(GradientBoostedRegressor, self).__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators)
+        self.loss = loss
 
     def fit_iter(self, X, y):
         """Create a fitting iterator for training set X, y.
 
         See class FitIter().
         """
-        X = safe_asarray(X)
-        y = np.array(y)    # N.B. makes a copy
         if 'int' in str(y.dtype):
-            raise NotImplementedError('ints typically mean classif')
-        return FitIter(self, X, y)
+            raise TypeError('Regression of int-valued targets is ambiguous'
+                    '. Please cast to float if you want to train using a '
+                    'regression criterion.')
+        if issubclass(self.loss, LossFunction):
+            loss = self.loss()
+        else:
+            loss = self.loss
+        return FitNIter(
+                ensemble=self,
+                fg=FunctionalGradient(loss, X, y),
+                n_iters=self.n_estimators)
 
     def fit(self, X, y):
         """Build a regression ensemble by funtional gradient boosting.
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 63e6e7469989b..6492951417847 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -23,6 +23,11 @@
 from ..tree._tree import MSE
 from ..tree._tree import DTYPE
 
+from .boosting import LossFunction
+from .boosting import LeastSquaresError
+from .boosting import LeastAbsoluteError
+from .boosting import BinomialDeviance
+
 
 # ignore overflows due to exp(-pred) in BinomailDeviance
 np.seterr(invalid='raise', under='raise', divide='raise', over='ignore')
@@ -89,20 +94,7 @@ def predict(self, X):
         y.fill(self.prior)
         return y
 
-
-class LossFunction(object):
-    """Abstract base class for various loss functions."""
-
-    def init_estimator(self, X, y):
-        pass
-
-    def __call__(self, y, pred):
-        pass
-
-    def negative_gradient(self, y, pred):
-        """Compute the negative gradient."""
-        pass
-
+class TreeLossMixin(object):
     def update_terminal_regions(self, tree, X, y, residual, y_pred,
                                 learn_rate=1.0):
         """Update the terminal regions (=leaves) of the given tree and
@@ -125,32 +117,14 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
         pass
 
 
-class LeastSquaresError(LossFunction):
+class TreeLeastSquaresError(LeastSquaresError, TreeLossMixin):
     """Loss function for least squares (LS) estimation.
     Terminal regions need not to be updated for least squares. """
 
-    def init_estimator(self):
-        return MeanPredictor()
-
-    def __call__(self, y, pred):
-        return np.mean((y - pred) ** 2.0)
 
-    def negative_gradient(self, y, pred):
-        return y - pred
-
-
-class LeastAbsoluteError(LossFunction):
+class TreeLeastAbsoluteError(LeastAbsoluteError, TreeLossMixin):
     """Loss function for least absolute deviation (LAD) regression. """
 
-    def init_estimator(self):
-        return MedianPredictor()
-
-    def __call__(self, y, pred):
-        return np.abs(y - pred).mean()
-
-    def negative_gradient(self, y, pred):
-        return np.sign(y - pred)
-
     def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
                                 residual, pred):
         """LAD updates terminal regions to median estimates. """
@@ -158,24 +132,9 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
                                         pred.take(terminal_region, axis=0))
 
 
-class BinomialDeviance(LossFunction):
+class TreeBinomialDeviance(BinomialDeviance, TreeLossMixin):
     """Binomial deviance loss function for binary classification."""
 
-    def init_estimator(self):
-        return ClassPriorPredictor()
-
-    def __call__(self, y, pred):
-        """Compute the deviance (= negative log-likelihood). """
-        ## return -2.0 * np.sum(y * pred -
-        ##                      np.log(1.0 + np.exp(pred))) / y.shape[0]
-
-        # logaddexp(0, v) == log(1.0 + exp(v))
-        return -2.0 * np.sum(y * pred -
-                             np.logaddexp(0.0, pred)) / y.shape[0]
-
-    def negative_gradient(self, y, pred):
-        return y - 1.0 / (1.0 + np.exp(-pred))
-
     def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
                                 residual, pred):
         """Make a single Newton-Raphson step. """
@@ -191,9 +150,9 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y,
             tree.value[leaf, 0] = numerator / denominator
 
 
-LOSS_FUNCTIONS = {'ls': LeastSquaresError,
-                  'lad': LeastAbsoluteError,
-                  'deviance': BinomialDeviance}
+LOSS_FUNCTIONS = {'ls': TreeLeastSquaresError,
+                  'lad': TreeLeastAbsoluteError,
+                  'deviance': TreeBinomialDeviance}
 
 
 class BaseGradientBoosting(BaseEstimator):
diff --git a/sklearn/ensemble/tests/test_boosting.py b/sklearn/ensemble/tests/test_boosting.py
index 30c4d1c0a9bc1..d4e98c62e1432 100644
--- a/sklearn/ensemble/tests/test_boosting.py
+++ b/sklearn/ensemble/tests/test_boosting.py
@@ -1,14 +1,14 @@
 from unittest import TestCase
 import numpy as np
-from sklearn.boosting import FunctionalGradientBoosting
+from sklearn.ensemble.boosting import GradientBoostedRegressor
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.datasets.base import load_boston
 
-class TestFunctionalGradientBoosting(TestCase):
+class TestGradientBoostedRegressor(TestCase):
     def setUp(self):
         self.task = load_boston()
         self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4)
-        self.boosting = FunctionalGradientBoosting(
+        self.boosting = GradientBoostedRegressor(
                 base_estimator=DecisionTreeRegressor(
                     max_depth=2,
                     min_split=4),
@@ -19,7 +19,7 @@ def test_fit_returns_self(self):
         assert r is self.boosting
 
     def test_1_estimator_matches_base(self):
-        self.boosting = FunctionalGradientBoosting(
+        self.boosting = GradientBoostedRegressor(
                 base_estimator=DecisionTreeRegressor(
                     max_depth=2,
                     min_split=4),
@@ -36,7 +36,7 @@ def test_n_estimators(self):
         assert len(self.boosting.estimators_) == self.boosting.n_estimators
 
     def test_int_y_not_implemented(self):
-        self.assertRaises(NotImplementedError,
+        self.assertRaises(TypeError,
                 self.boosting.fit,
                 np.ones((4, 5)), np.arange(4).astype('int'))
 
@@ -45,6 +45,6 @@ def test_mse_always_goes_down(self):
         task = self.task
         mse_list = []
         for fit_iter in model.fit_iter(task['data'], task['target']):
-            mse_list.append(np.mean(fit_iter.residual ** 2))
+            mse_list.append(np.mean(fit_iter.fg.residual ** 2))
             if len(mse_list) > 1:
                 self.assert_(mse_list[-1] < mse_list[-2])