Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions optimizer/_plot.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,43 @@
from matplotlib.ticker import MaxNLocator
import matplotlib.pylab as plt
import matplotlib.pyplot as plt
import optuna
import seaborn as sns
from matplotlib.ticker import MaxNLocator

import numpy as np

def _plot_progress(opt: optuna.Trial,
color: str = '#eeaa24') -> None:
"""
Plot the optimization progress of an Optuna study.

Parameters
----------
opt : BaseStudy
The Optuna study object to plot.
color : str, optional
The color of the regression line in the plot. Default is '#eeaa24'.

def _plot_progress(opt, marker='.', color='#eeaa24', alpha=0.8):
Returns
-------
Nothing:
None
"""

fig, ax = plt.subplots(1,1)
# Create a new figure with one subplot
fig, ax = plt.subplots(1, 1)

# Set the title and labels for the plot
ax.set_title(type(opt).__name__)
ax.set_xlabel('iters')
ax.set_ylabel('score')

# Get the trials from the Optuna study object
trials = opt.trials_

sns.regplot(trials.index+1, 'score', trials, color=color)
# Plot a regression line of the score over the index of each trial
sns.regplot(trials.index + 1, 'score', trials, color=color)

# Set the x-axis tick locator to only show integers
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
fig.show()

# Display the plot
plt.show()

82 changes: 42 additions & 40 deletions selector/base.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,35 @@
import pandas as pd
import numpy as np
import abc

from copy import copy
from time import time
from typing import List, Optional, Callable, Dict, Any, Union

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.random import check_random_state
from sklearn.exceptions import NotFittedError
from sklearn.utils import check_random_state

from robusta.crossval import crossval

from ._verbose import _print_last
from ._subset import FeatureSubset
from ._plot import _plot_progress, _plot_subset



from robusta.utils._subset import FeatureSubset
from robusta.utils._plot import _plot_progress, _plot_subset
from robusta.utils._verbose import _print_last


class _Selector(BaseEstimator, TransformerMixin):
"""A base class for feature selection transformers.

Attributes
----------
features_ : FeatureSubset
The selected features to be used in `transform`.

Methods
-------
transform(X: pd.DataFrame) -> pd.DataFrame:
Reduce X to the selected features.
get_subset() -> List[str]:
Get list of columns to select.
"""

def transform(self, X):
"""Reduce X to the selected features.
Expand Down Expand Up @@ -123,8 +133,7 @@ def _eval_subset(self, subset, X, y, groups):



def eval_subset(self, subset, X, y, groups=None):

def eval_subset(self, subset, X, y, groups=None):
# Convert to FeatureSubset
if type(subset) != type(self.features_):
subset = self.features_.copy().set_subset(subset)
Expand All @@ -146,7 +155,8 @@ def eval_subset(self, subset, X, y, groups=None):
self.trials_.append(subset)

# Verbose
_print_last(self)
if self.verbose:
print(subset)

# Check limits
self._check_max_iter()
Expand All @@ -158,14 +168,16 @@ def eval_subset(self, subset, X, y, groups=None):
def _check_max_iter(self):
if hasattr(self, 'max_iter') and self.max_iter:
if self.max_iter <= self.n_iters_:
if self.verbose: print('Iterations limit exceed!')
if self.verbose:
print('Iterations limit exceeded!')
raise KeyboardInterrupt


def _check_max_time(self):
if hasattr(self, 'max_time') and self.max_time:
if self.max_time <= self.total_time_:
if self.verbose: print('Time limit exceed!')
if self.verbose:
print('Time limit exceeded!')
raise KeyboardInterrupt


Expand All @@ -178,18 +190,18 @@ def n_iters_(self):
return len(self.trials_)


#@property
#def feature_importances_(self):
# subset = self._select_features()
# trial = _find_trial(subset)
# return pd.Series(trial['importance'], index=self.features_)
@property
def feature_importances_(self):
subset = self._select_features()
trial = _find_trial(subset)
return pd.Series(trial['importance'], index=self.features_)


#@property
#def feature_importances_std_(self):
# subset = self._select_features()
# trial = _find_trial(subset)
# return pd.Series(trial['importance_std'], index=self.features_)
@property
def feature_importances_std_(self):
subset = self._select_features()
trial = _find_trial(subset)
return pd.Series(trial['importance_std'], index=self.features_)


def plot_progress(self, **kwargs):
Expand All @@ -199,52 +211,42 @@ def plot_subset(self, **kwargs):
return _plot_subset(self, **kwargs)

def get_subset(self):

if hasattr(self, 'best_subset_'):
return self.best_subset_
else:
model_name = self.__class__.__name__
raise NotFittedError(f'{model_name} is not fitted')




def _check_k_features(k_features, n_features, param='k_features'):

if isinstance(k_features, int):
if k_features > 0:
k_features = k_features
else:
raise ValueError(f'Integer <{param}> must be greater than 0')

elif isinstance(k_features, float):
if 0 < k_features < 1:
k_features = max(k_features * n_features, 1)
k_features = int(k_features)
else:
raise ValueError(f'Float <{param}> must be from interval (0, 1)')

else:
raise ValueError(f'Parameter <{param}> must be int or float,'
f'got {k_features}')

return k_features




class _WrappedGroupSelector:
def _get_importance(subset,
result):
class WrappedGroupSelector:
@staticmethod
def _get_importance(subset, result):
if 'importance' in result:
features, imp = result['features'], result['importance']
groups = [group for group, _ in features]

imp = pd.DataFrame(imp, columns=groups).T
imp = imp.groupby(groups).sum()

subset.importance = imp.mean(axis=1)
subset.importance_std = imp.std(axis=1)
subset.importance = importance_std = imp.std(axis=1)
return subset

def _set_features(self, X):
Expand Down