SimulatorML · RANJITHROSAN17 · May 1, 2023 · May 1, 2023 · May 2, 2023 · May 2, 2023
diff --git a/optimizer/_plot.py b/optimizer/_plot.py
@@ -1,22 +1,43 @@
-from matplotlib.ticker import MaxNLocator
-import matplotlib.pylab as plt
+import matplotlib.pyplot as plt
+import optuna
 import seaborn as sns
+from matplotlib.ticker import MaxNLocator
 
-import numpy as np
-
+def _plot_progress(opt: optuna.Trial,
+                   color: str = '#eeaa24') -> None:
+    """
+    Plot the optimization progress of an Optuna study.
 
+    Parameters
+    ----------
+    opt : BaseStudy
+        The Optuna study object to plot.
+    color : str, optional
+        The color of the regression line in the plot. Default is '#eeaa24'.
 
-def _plot_progress(opt, marker='.', color='#eeaa24', alpha=0.8):
+    Returns
+    -------
+    Nothing:
+        None
+    """
 
-    fig, ax = plt.subplots(1,1)
+    # Create a new figure with one subplot
+    fig, ax = plt.subplots(1, 1)
 
+    # Set the title and labels for the plot
     ax.set_title(type(opt).__name__)
     ax.set_xlabel('iters')
     ax.set_ylabel('score')
 
+    # Get the trials from the Optuna study object
     trials = opt.trials_
 
-    sns.regplot(trials.index+1, 'score', trials, color=color)
+    # Plot a regression line of the score over the index of each trial
+    sns.regplot(trials.index + 1, 'score', trials, color=color)
 
+    # Set the x-axis tick locator to only show integers
     ax.xaxis.set_major_locator(MaxNLocator(integer=True))
-    fig.show()
+
+    # Display the plot
+    plt.show()
+
diff --git a/selector/base.py b/selector/base.py
@@ -1,25 +1,35 @@
-import pandas as pd
-import numpy as np
 import abc
-
 from copy import copy
 from time import time
+from typing import List, Optional, Callable, Dict, Any, Union
 
+import numpy as np
+import pandas as pd
 from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.utils.random import check_random_state
+from sklearn.exceptions import NotFittedError
+from sklearn.utils import check_random_state
 
 from robusta.crossval import crossval
-
-from ._verbose import _print_last
-from ._subset import FeatureSubset
-from ._plot import _plot_progress, _plot_subset
-
-
-
+from robusta.utils._subset import FeatureSubset
+from robusta.utils._plot import _plot_progress, _plot_subset
+from robusta.utils._verbose import _print_last
 
 
 class _Selector(BaseEstimator, TransformerMixin):
+    """A base class for feature selection transformers.
 
+    Attributes
+    ----------
+    features_ : FeatureSubset
+        The selected features to be used in `transform`.
+
+    Methods
+    -------
+    transform(X: pd.DataFrame) -> pd.DataFrame:
+        Reduce X to the selected features.
+    get_subset() -> List[str]:
+        Get list of columns to select.
+    """
 
     def transform(self, X):
         """Reduce X to the selected features.
@@ -123,8 +133,7 @@ def _eval_subset(self, subset, X, y, groups):
 
 
 
-    def eval_subset(self, subset, X, y, groups=None):
-
+   def eval_subset(self, subset, X, y, groups=None):
         # Convert to FeatureSubset
         if type(subset) != type(self.features_):
             subset = self.features_.copy().set_subset(subset)
@@ -146,7 +155,8 @@ def eval_subset(self, subset, X, y, groups=None):
         self.trials_.append(subset)
 
         # Verbose
-        _print_last(self)
+        if self.verbose:
+            print(subset)
 
         # Check limits
         self._check_max_iter()
@@ -158,14 +168,16 @@ def eval_subset(self, subset, X, y, groups=None):
     def _check_max_iter(self):
         if hasattr(self, 'max_iter') and self.max_iter:
             if self.max_iter <= self.n_iters_:
-                if self.verbose: print('Iterations limit exceed!')
+                if self.verbose:
+                    print('Iterations limit exceeded!')
                 raise KeyboardInterrupt
 
 
     def _check_max_time(self):
         if hasattr(self, 'max_time') and self.max_time:
             if self.max_time <= self.total_time_:
-                if self.verbose: print('Time limit exceed!')
+                if self.verbose:
+                    print('Time limit exceeded!')
                 raise KeyboardInterrupt
 
 
@@ -178,18 +190,18 @@ def n_iters_(self):
         return len(self.trials_)
 
 
-    #@property
-    #def feature_importances_(self):
-    #    subset = self._select_features()
-    #    trial = _find_trial(subset)
-    #    return pd.Series(trial['importance'], index=self.features_)
+    @property
+    def feature_importances_(self):
+        subset = self._select_features()
+        trial = _find_trial(subset)
+        return pd.Series(trial['importance'], index=self.features_)
 
 
-    #@property
-    #def feature_importances_std_(self):
-    #    subset = self._select_features()
-    #    trial = _find_trial(subset)
-    #    return pd.Series(trial['importance_std'], index=self.features_)
+    @property
+    def feature_importances_std_(self):
+        subset = self._select_features()
+        trial = _find_trial(subset)
+        return pd.Series(trial['importance_std'], index=self.features_)
 
 
     def plot_progress(self, **kwargs):
@@ -199,52 +211,42 @@ def plot_subset(self, **kwargs):
         return _plot_subset(self, **kwargs)
 
     def get_subset(self):
-
         if hasattr(self, 'best_subset_'):
             return self.best_subset_
         else:
             model_name = self.__class__.__name__
             raise NotFittedError(f'{model_name} is not fitted')
 
 
-
-
 def _check_k_features(k_features, n_features, param='k_features'):
-
     if isinstance(k_features, int):
         if k_features > 0:
             k_features = k_features
         else:
             raise ValueError(f'Integer <{param}> must be greater than 0')
-
     elif isinstance(k_features, float):
         if 0 < k_features < 1:
             k_features = max(k_features * n_features, 1)
             k_features = int(k_features)
         else:
             raise ValueError(f'Float <{param}> must be from interval (0, 1)')
-
     else:
         raise ValueError(f'Parameter <{param}> must be int or float,'
                          f'got {k_features}')
-
     return k_features
 
 
-
-
-class _WrappedGroupSelector:
-    def _get_importance(subset,
-                        result):
+class WrappedGroupSelector:
+    @staticmethod
+    def _get_importance(subset, result):
         if 'importance' in result:
             features, imp = result['features'], result['importance']
             groups = [group for group, _ in features]
 
             imp = pd.DataFrame(imp, columns=groups).T
             imp = imp.groupby(groups).sum()
 
-            subset.importance = imp.mean(axis=1)
-            subset.importance_std = imp.std(axis=1)
+            subset.importance = importance_std = imp.std(axis=1)
         return subset
 
     def _set_features(self, X):