From d5298d96c7698db2c679d685a3a8742afa5484cc Mon Sep 17 00:00:00 2001 From: James Bergstra Date: Mon, 19 Dec 2011 08:49:55 -0500 Subject: [PATCH 1/3] initial commit of FunctionalGradientBoosting --- sklearn/boosting.py | 128 +++++++++++++++++++++++++++++++++ sklearn/tests/test_boosting.py | 50 +++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 sklearn/boosting.py create mode 100644 sklearn/tests/test_boosting.py diff --git a/sklearn/boosting.py b/sklearn/boosting.py new file mode 100644 index 0000000000000..948154dfeaf83 --- /dev/null +++ b/sklearn/boosting.py @@ -0,0 +1,128 @@ +""" +Algorithms for Boosting: +- Functional Gradient Descent +""" + +# Authors: James Bergstra +# License: BSD3 + +import numpy as np + +from .utils import safe_asarray +from .ensemble import BaseEnsemble + + +class FitIter(object): + """ + Iterations (self.next()) implement one round of functional gradient + boosting. + + Attributes + ---------- + fgb : the FunctionalGradientBoosting instance + FitIter implements the self.fit of this object. + + X : array-like of shape = [n_samples, n_features] + Training input samples + + residual : array of shape = [n_samples] + Running regression target (originally the training target) + + N.B. This object works in-place on self.residual + + """ + def __init__(self, fgb, X, residual): + self.fgb = fgb + self.X = X + self.residual = residual + + def __iter__(self): + return self + + def next(self): + if self.fgb.n_estimators == len(self.fgb.estimators_): + raise StopIteration + if self.fgb.estimators_: + self.residual -= self.fgb.estimators_[-1].predict(self.X) + base = self.fgb._make_estimator() + base.fit(self.X, self.residual) + return self + + +class FunctionalGradientBoosting(BaseEnsemble): + """ + Regression Boosting via functional gradient descent. + + The algorithm is to construct a regression ensemble by using a "base + estimator" to repeatedly fit residual training error. So for example, the + first iteration fits some function f() to the original (X, y) training + data, and the second iteration fits some g() to (X, y - f(X)), the third + iterations fits some h() to (X y - f(X) - g(X)), and so on. The final + ensemble is f() + g() + h() + ... + + This procedure is equivalent to functional gradient descent when the the + training objective is to minimize mean squared error (MSE). + + For more information see e.g.: + J. H. Friedman (2002). "Stochastic Gradient Boosting", + Computational Statistics & Data Analysis. + + TODO: Mason has a good paper on the subject as well. + """ + + def __init__(self, base_estimator, n_estimators,): + super(FunctionalGradientBoosting, self).__init__( + base_estimator=base_estimator, + n_estimators=n_estimators) + + def fit_iter(self, X, y): + """Create a fitting iterator for training set X, y. + + See class FitIter(). + """ + X = safe_asarray(X) + y = np.array(y) # N.B. makes a copy + if 'int' in str(y.dtype): + raise NotImplementedError('ints typically mean classif') + return FitIter(self, X, y) + + def fit(self, X, y): + """Build a regression ensemble by funtional gradient boosting. + + Parameters + ---------- + X : array-like of shape = [n_samples, n_features] + The training input samples. + + y : array-like, shape = [n_samples] + The target values (integers that correspond to classes in + classification, real numbers in regression). + + Return + ------ + self : object + Returns self. + """ + for _ in self.fit_iter(X, y): + pass + return self + + def predict(self, X): + """Return the prediction for array-like X. + + Parameters + ---------- + X : array-like of shape = [n_samples, n_features] + Test samples. + + Return + ------ + prediction : numpy array of shape = [n_samples] + Test predictions. + + """ + rval = self.estimators_[0].predict(X) + for estimator in self.estimators_[1:]: + pred_i = estimator.predict(X) + rval += pred_i + return rval diff --git a/sklearn/tests/test_boosting.py b/sklearn/tests/test_boosting.py new file mode 100644 index 0000000000000..30c4d1c0a9bc1 --- /dev/null +++ b/sklearn/tests/test_boosting.py @@ -0,0 +1,50 @@ +from unittest import TestCase +import numpy as np +from sklearn.boosting import FunctionalGradientBoosting +from sklearn.tree import DecisionTreeRegressor +from sklearn.datasets.base import load_boston + +class TestFunctionalGradientBoosting(TestCase): + def setUp(self): + self.task = load_boston() + self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4) + self.boosting = FunctionalGradientBoosting( + base_estimator=DecisionTreeRegressor( + max_depth=2, + min_split=4), + n_estimators=5) + + def test_fit_returns_self(self): + r = self.boosting.fit(self.task['data'], self.task['target']) + assert r is self.boosting + + def test_1_estimator_matches_base(self): + self.boosting = FunctionalGradientBoosting( + base_estimator=DecisionTreeRegressor( + max_depth=2, + min_split=4), + n_estimators=1) + self.base_est.fit(self.task['data'], self.task['target']) + self.boosting.fit(self.task['data'], self.task['target']) + pred1 = self.base_est.predict(self.task['data']) + pred2 = self.boosting.predict(self.task['data']) + self.assert_(np.allclose(pred1, pred2)) + + def test_n_estimators(self): + assert len(self.boosting.estimators_) == 0 + self.boosting.fit(self.task['data'], self.task['target']) + assert len(self.boosting.estimators_) == self.boosting.n_estimators + + def test_int_y_not_implemented(self): + self.assertRaises(NotImplementedError, + self.boosting.fit, + np.ones((4, 5)), np.arange(4).astype('int')) + + def test_mse_always_goes_down(self): + model = self.boosting + task = self.task + mse_list = [] + for fit_iter in model.fit_iter(task['data'], task['target']): + mse_list.append(np.mean(fit_iter.residual ** 2)) + if len(mse_list) > 1: + self.assert_(mse_list[-1] < mse_list[-2]) From 7aef569f7ed941d8ccc1b472937566d6fc1cdcb8 Mon Sep 17 00:00:00 2001 From: James Bergstra Date: Wed, 21 Dec 2011 09:26:57 -0500 Subject: [PATCH 2/3] moved my boosting file into ensemble dir --- sklearn/{ => ensemble}/boosting.py | 0 sklearn/{ => ensemble}/tests/test_boosting.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sklearn/{ => ensemble}/boosting.py (100%) rename sklearn/{ => ensemble}/tests/test_boosting.py (100%) diff --git a/sklearn/boosting.py b/sklearn/ensemble/boosting.py similarity index 100% rename from sklearn/boosting.py rename to sklearn/ensemble/boosting.py diff --git a/sklearn/tests/test_boosting.py b/sklearn/ensemble/tests/test_boosting.py similarity index 100% rename from sklearn/tests/test_boosting.py rename to sklearn/ensemble/tests/test_boosting.py From 98092a3147d86f6a577dab747c4c91b5b98b6b3e Mon Sep 17 00:00:00 2001 From: James Bergstra Date: Wed, 21 Dec 2011 10:19:50 -0500 Subject: [PATCH 3/3] trying to refactor functional gradient to be used beyond trees --- sklearn/ensemble/boosting.py | 134 ++++++++++++++++++------ sklearn/ensemble/gradient_boosting.py | 65 +++--------- sklearn/ensemble/tests/test_boosting.py | 12 +-- 3 files changed, 121 insertions(+), 90 deletions(-) diff --git a/sklearn/ensemble/boosting.py b/sklearn/ensemble/boosting.py index 948154dfeaf83..ff0b2ddd53e01 100644 --- a/sklearn/ensemble/boosting.py +++ b/sklearn/ensemble/boosting.py @@ -8,48 +8,111 @@ import numpy as np -from .utils import safe_asarray -from .ensemble import BaseEnsemble +from ..utils import safe_asarray +from ..ensemble import BaseEnsemble +from ..base import RegressorMixin -class FitIter(object): +class LossFunction(object): + """Abstract base class for various loss functions.""" + + def init_estimator(self, X, y): + pass + + def __call__(self, y, pred): + pass + + def negative_gradient(self, y, pred): + """Compute the negative gradient.""" + pass + + +class LeastSquaresError(LossFunction): + """Loss function for least squares (LS) estimation. + Terminal regions need not to be updated for least squares. """ + + def init_estimator(self): + return MeanPredictor() + + def __call__(self, y, pred): + return np.mean((y - pred) ** 2.0) + + def negative_gradient(self, y, pred): + return y - pred + + +class LeastAbsoluteError(LossFunction): + """Loss function for least absolute deviation (LAD) regression. """ + + def init_estimator(self): + return MedianPredictor() + + def __call__(self, y, pred): + return np.abs(y - pred).mean() + + def negative_gradient(self, y, pred): + return np.sign(y - pred) + + +class BinomialDeviance(LossFunction): + """Binomial deviance loss function for binary classification.""" + + def init_estimator(self): + return ClassPriorPredictor() + + def __call__(self, y, pred): + """Compute the deviance (= negative log-likelihood). """ + ## return -2.0 * np.sum(y * pred - + ## np.log(1.0 + np.exp(pred))) / y.shape[0] + + # logaddexp(0, v) == log(1.0 + exp(v)) + return -2.0 * np.sum(y * pred - + np.logaddexp(0.0, pred)) / y.shape[0] + + def negative_gradient(self, y, pred): + return y - 1.0 / (1.0 + np.exp(-pred)) + + +class FunctionalGradient(object): + def __init__(self, loss, X, y): + self.loss = loss + self.X = X + self.y = y + self.residual = np.array(y) # copies, ensures array + + def current_Xy(self): + return self.X, self.residual + + def update(self, prediction): + self.residual = self.loss.negative_gradient(self.residual, prediction) + + +class FitNIter(object): """ Iterations (self.next()) implement one round of functional gradient boosting. - Attributes - ---------- - fgb : the FunctionalGradientBoosting instance - FitIter implements the self.fit of this object. - - X : array-like of shape = [n_samples, n_features] - Training input samples - - residual : array of shape = [n_samples] - Running regression target (originally the training target) - - N.B. This object works in-place on self.residual """ - def __init__(self, fgb, X, residual): - self.fgb = fgb - self.X = X - self.residual = residual + def __init__(self, ensemble, fg, n_iters): + self.ensemble = ensemble + self.fg = fg + self.n_iters = n_iters def __iter__(self): return self def next(self): - if self.fgb.n_estimators == len(self.fgb.estimators_): + if self.n_iters == len(self.ensemble.estimators_): raise StopIteration - if self.fgb.estimators_: - self.residual -= self.fgb.estimators_[-1].predict(self.X) - base = self.fgb._make_estimator() - base.fit(self.X, self.residual) + base = self.ensemble._make_estimator() + X, y = self.fg.current_Xy() + base.fit(X, y) + self.fg.update(base.predict(X)) return self -class FunctionalGradientBoosting(BaseEnsemble): +class GradientBoostedRegressor(BaseEnsemble, RegressorMixin): """ Regression Boosting via functional gradient descent. @@ -70,21 +133,30 @@ class FunctionalGradientBoosting(BaseEnsemble): TODO: Mason has a good paper on the subject as well. """ - def __init__(self, base_estimator, n_estimators,): - super(FunctionalGradientBoosting, self).__init__( + def __init__(self, base_estimator, n_estimators, + loss=LeastSquaresError): + super(GradientBoostedRegressor, self).__init__( base_estimator=base_estimator, n_estimators=n_estimators) + self.loss = loss def fit_iter(self, X, y): """Create a fitting iterator for training set X, y. See class FitIter(). """ - X = safe_asarray(X) - y = np.array(y) # N.B. makes a copy if 'int' in str(y.dtype): - raise NotImplementedError('ints typically mean classif') - return FitIter(self, X, y) + raise TypeError('Regression of int-valued targets is ambiguous' + '. Please cast to float if you want to train using a ' + 'regression criterion.') + if issubclass(self.loss, LossFunction): + loss = self.loss() + else: + loss = self.loss + return FitNIter( + ensemble=self, + fg=FunctionalGradient(loss, X, y), + n_iters=self.n_estimators) def fit(self, X, y): """Build a regression ensemble by funtional gradient boosting. diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 63e6e7469989b..6492951417847 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -23,6 +23,11 @@ from ..tree._tree import MSE from ..tree._tree import DTYPE +from .boosting import LossFunction +from .boosting import LeastSquaresError +from .boosting import LeastAbsoluteError +from .boosting import BinomialDeviance + # ignore overflows due to exp(-pred) in BinomailDeviance np.seterr(invalid='raise', under='raise', divide='raise', over='ignore') @@ -89,20 +94,7 @@ def predict(self, X): y.fill(self.prior) return y - -class LossFunction(object): - """Abstract base class for various loss functions.""" - - def init_estimator(self, X, y): - pass - - def __call__(self, y, pred): - pass - - def negative_gradient(self, y, pred): - """Compute the negative gradient.""" - pass - +class TreeLossMixin(object): def update_terminal_regions(self, tree, X, y, residual, y_pred, learn_rate=1.0): """Update the terminal regions (=leaves) of the given tree and @@ -125,32 +117,14 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y, pass -class LeastSquaresError(LossFunction): +class TreeLeastSquaresError(LeastSquaresError, TreeLossMixin): """Loss function for least squares (LS) estimation. Terminal regions need not to be updated for least squares. """ - def init_estimator(self): - return MeanPredictor() - - def __call__(self, y, pred): - return np.mean((y - pred) ** 2.0) - def negative_gradient(self, y, pred): - return y - pred - - -class LeastAbsoluteError(LossFunction): +class TreeLeastAbsoluteError(LeastAbsoluteError, TreeLossMixin): """Loss function for least absolute deviation (LAD) regression. """ - def init_estimator(self): - return MedianPredictor() - - def __call__(self, y, pred): - return np.abs(y - pred).mean() - - def negative_gradient(self, y, pred): - return np.sign(y - pred) - def _update_terminal_region(self, tree, leaf, terminal_region, X, y, residual, pred): """LAD updates terminal regions to median estimates. """ @@ -158,24 +132,9 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y, pred.take(terminal_region, axis=0)) -class BinomialDeviance(LossFunction): +class TreeBinomialDeviance(BinomialDeviance, TreeLossMixin): """Binomial deviance loss function for binary classification.""" - def init_estimator(self): - return ClassPriorPredictor() - - def __call__(self, y, pred): - """Compute the deviance (= negative log-likelihood). """ - ## return -2.0 * np.sum(y * pred - - ## np.log(1.0 + np.exp(pred))) / y.shape[0] - - # logaddexp(0, v) == log(1.0 + exp(v)) - return -2.0 * np.sum(y * pred - - np.logaddexp(0.0, pred)) / y.shape[0] - - def negative_gradient(self, y, pred): - return y - 1.0 / (1.0 + np.exp(-pred)) - def _update_terminal_region(self, tree, leaf, terminal_region, X, y, residual, pred): """Make a single Newton-Raphson step. """ @@ -191,9 +150,9 @@ def _update_terminal_region(self, tree, leaf, terminal_region, X, y, tree.value[leaf, 0] = numerator / denominator -LOSS_FUNCTIONS = {'ls': LeastSquaresError, - 'lad': LeastAbsoluteError, - 'deviance': BinomialDeviance} +LOSS_FUNCTIONS = {'ls': TreeLeastSquaresError, + 'lad': TreeLeastAbsoluteError, + 'deviance': TreeBinomialDeviance} class BaseGradientBoosting(BaseEstimator): diff --git a/sklearn/ensemble/tests/test_boosting.py b/sklearn/ensemble/tests/test_boosting.py index 30c4d1c0a9bc1..d4e98c62e1432 100644 --- a/sklearn/ensemble/tests/test_boosting.py +++ b/sklearn/ensemble/tests/test_boosting.py @@ -1,14 +1,14 @@ from unittest import TestCase import numpy as np -from sklearn.boosting import FunctionalGradientBoosting +from sklearn.ensemble.boosting import GradientBoostedRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.datasets.base import load_boston -class TestFunctionalGradientBoosting(TestCase): +class TestGradientBoostedRegressor(TestCase): def setUp(self): self.task = load_boston() self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4) - self.boosting = FunctionalGradientBoosting( + self.boosting = GradientBoostedRegressor( base_estimator=DecisionTreeRegressor( max_depth=2, min_split=4), @@ -19,7 +19,7 @@ def test_fit_returns_self(self): assert r is self.boosting def test_1_estimator_matches_base(self): - self.boosting = FunctionalGradientBoosting( + self.boosting = GradientBoostedRegressor( base_estimator=DecisionTreeRegressor( max_depth=2, min_split=4), @@ -36,7 +36,7 @@ def test_n_estimators(self): assert len(self.boosting.estimators_) == self.boosting.n_estimators def test_int_y_not_implemented(self): - self.assertRaises(NotImplementedError, + self.assertRaises(TypeError, self.boosting.fit, np.ones((4, 5)), np.arange(4).astype('int')) @@ -45,6 +45,6 @@ def test_mse_always_goes_down(self): task = self.task mse_list = [] for fit_iter in model.fit_iter(task['data'], task['target']): - mse_list.append(np.mean(fit_iter.residual ** 2)) + mse_list.append(np.mean(fit_iter.fg.residual ** 2)) if len(mse_list) > 1: self.assert_(mse_list[-1] < mse_list[-2])