From 5fd3e16894f299dcfee25a834c62943797af6682 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Mon, 1 May 2023 21:48:39 +0530 Subject: [PATCH 1/9] Update base.py --- selector/base.py | 62 +++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/selector/base.py b/selector/base.py index 1a4094d..ded5332 100644 --- a/selector/base.py +++ b/selector/base.py @@ -3,20 +3,20 @@ import abc from copy import copy + from time import time from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.random import check_random_state - -from robusta.crossval import crossval -from ._verbose import _print_last -from ._subset import FeatureSubset -from ._plot import _plot_progress, _plot_subset +from sklearn.utils import check_random_state +from robusta.crossval import crossval +from mypackage._verbose import _print_last +from mypackage._subset import FeatureSubset +from mypackage._plot import _plot_progress, _plot_subset class _Selector(BaseEstimator, TransformerMixin): @@ -123,8 +123,7 @@ def _eval_subset(self, subset, X, y, groups): - def eval_subset(self, subset, X, y, groups=None): - + def eval_subset(self, subset, X, y, groups=None): # Convert to FeatureSubset if type(subset) != type(self.features_): subset = self.features_.copy().set_subset(subset) @@ -146,7 +145,8 @@ def eval_subset(self, subset, X, y, groups=None): self.trials_.append(subset) # Verbose - _print_last(self) + if self.verbose: + print(subset) # Check limits self._check_max_iter() @@ -158,14 +158,16 @@ def eval_subset(self, subset, X, y, groups=None): def _check_max_iter(self): if hasattr(self, 'max_iter') and self.max_iter: if self.max_iter <= self.n_iters_: - if self.verbose: print('Iterations limit exceed!') + if self.verbose: + print('Iterations limit exceeded!') raise KeyboardInterrupt def _check_max_time(self): if hasattr(self, 'max_time') and self.max_time: if self.max_time <= self.total_time_: - if self.verbose: print('Time limit exceed!') + if self.verbose: + print('Time limit exceeded!') raise KeyboardInterrupt @@ -178,18 +180,18 @@ def n_iters_(self): return len(self.trials_) - #@property - #def feature_importances_(self): - # subset = self._select_features() - # trial = _find_trial(subset) - # return pd.Series(trial['importance'], index=self.features_) + @property + def feature_importances_(self): + subset = self._select_features() + trial = _find_trial(subset) + return pd.Series(trial['importance'], index=self.features_) - #@property - #def feature_importances_std_(self): - # subset = self._select_features() - # trial = _find_trial(subset) - # return pd.Series(trial['importance_std'], index=self.features_) + @property + def feature_importances_std_(self): + subset = self._select_features() + trial = _find_trial(subset) + return pd.Series(trial['importance_std'], index=self.features_) def plot_progress(self, **kwargs): @@ -199,7 +201,6 @@ def plot_subset(self, **kwargs): return _plot_subset(self, **kwargs) def get_subset(self): - if hasattr(self, 'best_subset_'): return self.best_subset_ else: @@ -207,35 +208,27 @@ def get_subset(self): raise NotFittedError(f'{model_name} is not fitted') - - def _check_k_features(k_features, n_features, param='k_features'): - if isinstance(k_features, int): if k_features > 0: k_features = k_features else: raise ValueError(f'Integer <{param}> must be greater than 0') - elif isinstance(k_features, float): if 0 < k_features < 1: k_features = max(k_features * n_features, 1) k_features = int(k_features) else: raise ValueError(f'Float <{param}> must be from interval (0, 1)') - else: raise ValueError(f'Parameter <{param}> must be int or float,' f'got {k_features}') - return k_features - - -class _WrappedGroupSelector: - def _get_importance(subset, - result): +class WrappedGroupSelector: + @staticmethod + def _get_importance(subset, result): if 'importance' in result: features, imp = result['features'], result['importance'] groups = [group for group, _ in features] @@ -243,8 +236,7 @@ def _get_importance(subset, imp = pd.DataFrame(imp, columns=groups).T imp = imp.groupby(groups).sum() - subset.importance = imp.mean(axis=1) - subset.importance_std = imp.std(axis=1) + subset.importance = importance_std = imp.std(axis=1) return subset def _set_features(self, X): From 737f33159c62378290c4c868ff0d4fb2978cd6ee Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Mon, 1 May 2023 21:57:58 +0530 Subject: [PATCH 2/9] Update _plot.py --- optimizer/_plot.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/optimizer/_plot.py b/optimizer/_plot.py index 38802af..8c1b798 100644 --- a/optimizer/_plot.py +++ b/optimizer/_plot.py @@ -1,22 +1,14 @@ from matplotlib.ticker import MaxNLocator -import matplotlib.pylab as plt +import matplotlib.pyplot as plt import seaborn as sns - import numpy as np - - def _plot_progress(opt, marker='.', color='#eeaa24', alpha=0.8): - fig, ax = plt.subplots(1,1) - ax.set_title(type(opt).__name__) ax.set_xlabel('iters') ax.set_ylabel('score') - - trials = opt.trials_ - - sns.regplot(trials.index+1, 'score', trials, color=color) - + trials = opt.trials + sns.regplot(trials.index+1, 'score', data=trials, color=color) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) - fig.show() + plt.show() From 463053082815e91cd54c917cd241a276d382e156 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:40:05 +0530 Subject: [PATCH 3/9] Update base.py --- selector/base.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/selector/base.py b/selector/base.py index ded5332..e61647b 100644 --- a/selector/base.py +++ b/selector/base.py @@ -1,22 +1,14 @@ import pandas as pd import numpy as np import abc - from copy import copy - from time import time - from sklearn.base import BaseEstimator, TransformerMixin - from sklearn.utils import check_random_state - from robusta.crossval import crossval - -from mypackage._verbose import _print_last - -from mypackage._subset import FeatureSubset - -from mypackage._plot import _plot_progress, _plot_subset +from robusta.utils._subset import FeatureSubset +from robusta.utils._plot import _plot_progress, _plot_subset +from robusta.utils._verbose import _print_last class _Selector(BaseEstimator, TransformerMixin): From f0e70932c5ee7a498bd876ec86a4d5be07e91cb6 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:46:34 +0530 Subject: [PATCH 4/9] Update base.py --- selector/base.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/selector/base.py b/selector/base.py index e61647b..d5ea502 100644 --- a/selector/base.py +++ b/selector/base.py @@ -1,17 +1,37 @@ -import pandas as pd -import numpy as np import abc from copy import copy from time import time +from typing import List, Optional, Callable, Dict, Any, Union + +import numpy as np +import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.exceptions import NotFittedError from sklearn.utils import check_random_state + from robusta.crossval import crossval from robusta.utils._subset import FeatureSubset from robusta.utils._plot import _plot_progress, _plot_subset from robusta.utils._verbose import _print_last -class _Selector(BaseEstimator, TransformerMixin): +class _Selector(BaseEstimator, TransformerMixin): + """ + A base class for feature selection transformers. + + Attributes + ---------- + features_ : FeatureSubset + The selected features to be used in `transform`. + """ +Methods + ------- + transform(X: pd.DataFrame) -> pd.DataFrame: + Reduce X to the selected features. + get_subset() -> List[str]: + Get list of columns to select. + + """ def transform(self, X): """Reduce X to the selected features. From 1209c17591a4ca27d568b0b025f53748584ff595 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:48:37 +0530 Subject: [PATCH 5/9] Update base.py --- selector/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selector/base.py b/selector/base.py index d5ea502..55189e6 100644 --- a/selector/base.py +++ b/selector/base.py @@ -24,7 +24,7 @@ class _Selector(BaseEstimator, TransformerMixin): features_ : FeatureSubset The selected features to be used in `transform`. """ -Methods + Methods ------- transform(X: pd.DataFrame) -> pd.DataFrame: Reduce X to the selected features. From b8caddd714d85edff0957640e79499ee8577a7f0 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:49:22 +0530 Subject: [PATCH 6/9] Update base.py --- selector/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selector/base.py b/selector/base.py index 55189e6..edf63c0 100644 --- a/selector/base.py +++ b/selector/base.py @@ -16,8 +16,7 @@ class _Selector(BaseEstimator, TransformerMixin): - """ - A base class for feature selection transformers. + """A base class for feature selection transformers. Attributes ---------- @@ -30,7 +29,6 @@ class _Selector(BaseEstimator, TransformerMixin): Reduce X to the selected features. get_subset() -> List[str]: Get list of columns to select. - """ def transform(self, X): From 3634665a797263cafc1c3906daf87d23e6e37eaa Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:49:45 +0530 Subject: [PATCH 7/9] Update base.py --- selector/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/selector/base.py b/selector/base.py index edf63c0..c5c368f 100644 --- a/selector/base.py +++ b/selector/base.py @@ -23,6 +23,7 @@ class _Selector(BaseEstimator, TransformerMixin): features_ : FeatureSubset The selected features to be used in `transform`. """ + Methods ------- transform(X: pd.DataFrame) -> pd.DataFrame: From 1a64e12addfaf9ede08d61297d78242bd481ecf1 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:51:01 +0530 Subject: [PATCH 8/9] Update base.py --- selector/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/selector/base.py b/selector/base.py index c5c368f..a812156 100644 --- a/selector/base.py +++ b/selector/base.py @@ -22,8 +22,7 @@ class _Selector(BaseEstimator, TransformerMixin): ---------- features_ : FeatureSubset The selected features to be used in `transform`. - """ - + Methods ------- transform(X: pd.DataFrame) -> pd.DataFrame: From 2bb7b5ec9a1c191a5428705165c6485134d5e260 Mon Sep 17 00:00:00 2001 From: RANJITH ROSAN <91519971+RANJITHROSAN17@users.noreply.github.com> Date: Tue, 2 May 2023 12:55:27 +0530 Subject: [PATCH 9/9] Update _plot.py --- optimizer/_plot.py | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/optimizer/_plot.py b/optimizer/_plot.py index 8c1b798..75ffae8 100644 --- a/optimizer/_plot.py +++ b/optimizer/_plot.py @@ -1,14 +1,43 @@ -from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as plt +import optuna import seaborn as sns -import numpy as np +from matplotlib.ticker import MaxNLocator + +def _plot_progress(opt: optuna.Trial, + color: str = '#eeaa24') -> None: + """ + Plot the optimization progress of an Optuna study. + + Parameters + ---------- + opt : BaseStudy + The Optuna study object to plot. + color : str, optional + The color of the regression line in the plot. Default is '#eeaa24'. + + Returns + ------- + Nothing: + None + """ + + # Create a new figure with one subplot + fig, ax = plt.subplots(1, 1) -def _plot_progress(opt, marker='.', color='#eeaa24', alpha=0.8): - fig, ax = plt.subplots(1,1) + # Set the title and labels for the plot ax.set_title(type(opt).__name__) ax.set_xlabel('iters') ax.set_ylabel('score') - trials = opt.trials - sns.regplot(trials.index+1, 'score', data=trials, color=color) + + # Get the trials from the Optuna study object + trials = opt.trials_ + + # Plot a regression line of the score over the index of each trial + sns.regplot(trials.index + 1, 'score', trials, color=color) + + # Set the x-axis tick locator to only show integers ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + + # Display the plot plt.show() +