Skip to content

Commit 45faad5

Browse files
committed
added AutoNetEnsemble + PowerTransformer + Weight Initialization Strategies. Some Changes in Pipeline, TrainNode, CrossValidation
1 parent d7a0f1c commit 45faad5

File tree

108 files changed

+2680
-960
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+2680
-960
lines changed

.gitignore

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
# Python
99
*__pycache__/
10+
*.pyc
11+
12+
# Zipped
13+
*.tar.gz
1014

1115
# Temp
1216
*tmp_models/
@@ -17,7 +21,28 @@ benchmark_results_cluster/
1721
ns_credentials*/
1822
configs.json
1923
results.json
24+
outputs/
25+
jobs.txt
26+
.pylintrc
2027

2128
# Build
2229
*build/
2330
*autonet.egg-info
31+
*.simg
32+
33+
# Metalearning data
34+
/metalearning_data/
35+
/metalearning_comparison_results/
36+
/meta_outputs/
37+
/metamodels/
38+
39+
40+
# Datasets
41+
/datasets/
42+
43+
# Meta GPU
44+
*meta_logs/
45+
46+
# ensemble data
47+
predictions_for_ensemble.npy
48+
test_predictions_for_ensemble.npy

autoPyTorch/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
1+
import sys, os
2+
hpbandster = os.path.abspath(os.path.join(__file__, '..', '..', 'submodules', 'HpBandSter'))
3+
sys.path.append(hpbandster)
4+
15
from autoPyTorch.core.autonet_classes import AutoNetClassification, AutoNetMultilabel, AutoNetRegression
26
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
7+
from autoPyTorch.core.ensemble import AutoNetEnsemble
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from abc import ABCMeta, abstractmethod
2+
3+
4+
class AbstractEnsemble(object):
5+
__metaclass__ = ABCMeta
6+
7+
@abstractmethod
8+
def fit(self, base_models_predictions, true_targets, model_identifiers):
9+
"""Fit an ensemble given predictions of base models and targets.
10+
11+
Ensemble building maximizes performance (in contrast to
12+
hyperparameter optimization)!
13+
14+
Parameters
15+
----------
16+
base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
17+
n_targets is the number of classes in case of classification,
18+
n_targets is 0 or 1 in case of regression
19+
20+
true_targets : array of shape [n_targets]
21+
22+
model_identifiers : identifier for each base model.
23+
Can be used for practical text output of the ensemble.
24+
25+
Returns
26+
-------
27+
self
28+
29+
"""
30+
pass
31+
32+
@abstractmethod
33+
def predict(self, base_models_predictions):
34+
"""Create ensemble predictions from the base model predictions.
35+
36+
Parameters
37+
----------
38+
base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
39+
Same as in the fit method.
40+
41+
Returns
42+
-------
43+
array : [n_data_points]
44+
"""
45+
self
46+
47+
@abstractmethod
48+
def get_models_with_weights(self, models):
49+
"""Return a list of (weight, model) pairs
50+
51+
Parameters
52+
----------
53+
models : dict {identifier : model object}
54+
The identifiers are the same as the one presented to the fit()
55+
method. Models can be used for nice printing.
56+
57+
Returns
58+
-------
59+
array : [(weight_1, model_1), ..., (weight_n, model_n)]
60+
"""
61+
62+
63+
@abstractmethod
64+
def get_selected_model_identifiers(self):
65+
"""Return identifiers of models in the ensemble.
66+
67+
This includes models which have a weight of zero!
68+
69+
Returns
70+
-------
71+
list
72+
"""
73+
74+
@abstractmethod
75+
def get_validation_performance(self):
76+
"""Return validation performance of ensemble.
77+
78+
Return
79+
------
80+
float
81+
"""
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
from collections import Counter
2+
import random
3+
4+
import numpy as np
5+
6+
from autoPyTorch.components.ensembles.abstract_ensemble import AbstractEnsemble
7+
8+
9+
class EnsembleSelection(AbstractEnsemble):
10+
def __init__(self, ensemble_size, metric, minimize,
11+
sorted_initialization_n_best=0, only_consider_n_best=0,
12+
bagging=False, mode='fast'):
13+
self.ensemble_size = ensemble_size
14+
self.metric = metric
15+
self.minimize = 1 if minimize else -1
16+
self.sorted_initialization_n_best = sorted_initialization_n_best
17+
self.only_consider_n_best = only_consider_n_best
18+
self.bagging = bagging
19+
self.mode = mode
20+
21+
def fit(self, predictions, labels, identifiers):
22+
self.ensemble_size = int(self.ensemble_size)
23+
if self.ensemble_size < 1:
24+
raise ValueError('Ensemble size cannot be less than one!')
25+
if self.mode not in ('fast', 'slow'):
26+
raise ValueError('Unknown mode %s' % self.mode)
27+
28+
if self.bagging:
29+
self._bagging(predictions, labels)
30+
else:
31+
self._fit(predictions, labels)
32+
self._calculate_weights()
33+
self.identifiers_ = identifiers
34+
return self
35+
36+
def _fit(self, predictions, labels):
37+
if self.mode == 'fast':
38+
self._fast(predictions, labels)
39+
else:
40+
self._slow(predictions, labels)
41+
return self
42+
43+
def _fast(self, predictions, labels):
44+
"""Fast version of Rich Caruana's ensemble selection method."""
45+
self.num_input_models_ = len(predictions)
46+
47+
ensemble = []
48+
trajectory = []
49+
order = []
50+
51+
ensemble_size = self.ensemble_size
52+
53+
if self.sorted_initialization_n_best > 0:
54+
indices = self._sorted_initialization(predictions, labels, self.sorted_initialization_n_best)
55+
for idx in indices:
56+
ensemble.append(predictions[idx])
57+
order.append(idx)
58+
ensemble_ = np.array(ensemble).mean(axis=0)
59+
ensemble_performance = self.metric(ensemble_, labels) * self.minimize
60+
trajectory.append(ensemble_performance)
61+
ensemble_size -= self.sorted_initialization_n_best
62+
63+
only_consider_indices = None
64+
if self.only_consider_n_best > 0:
65+
only_consider_indices = set(self._sorted_initialization(predictions, labels, self.only_consider_n_best))
66+
67+
for i in range(ensemble_size):
68+
scores = np.zeros((len(predictions)))
69+
s = len(ensemble)
70+
if s == 0:
71+
weighted_ensemble_prediction = np.zeros(predictions[0].shape)
72+
else:
73+
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
74+
weighted_ensemble_prediction = (s / float(s + 1)) * \
75+
ensemble_prediction
76+
fant_ensemble_prediction = np.zeros(weighted_ensemble_prediction.shape)
77+
for j, pred in enumerate(predictions):
78+
# TODO: this could potentially be vectorized! - let's profile
79+
# the script first!
80+
if only_consider_indices and j not in only_consider_indices:
81+
scores[j] = float("inf")
82+
continue
83+
fant_ensemble_prediction[:,:] = weighted_ensemble_prediction + \
84+
(1. / float(s + 1)) * pred
85+
scores[j] = self.metric(fant_ensemble_prediction, labels) * self.minimize
86+
all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
87+
best = np.random.choice(all_best)
88+
ensemble.append(predictions[best])
89+
trajectory.append(scores[best])
90+
order.append(best)
91+
92+
# Handle special case
93+
if len(predictions) == 1:
94+
break
95+
96+
self.indices_ = order
97+
self.trajectory_ = trajectory
98+
self.train_score_ = trajectory[-1]
99+
100+
def _slow(self, predictions, labels):
101+
"""Rich Caruana's ensemble selection method."""
102+
self.num_input_models_ = len(predictions)
103+
104+
ensemble = []
105+
trajectory = []
106+
order = []
107+
108+
ensemble_size = self.ensemble_size
109+
110+
if self.sorted_initialization_n_best > 0:
111+
indices = self._sorted_initialization(predictions, labels, self.sorted_initialization_n_best)
112+
for idx in indices:
113+
ensemble.append(predictions[idx])
114+
order.append(idx)
115+
ensemble_ = np.array(ensemble).mean(axis=0)
116+
ensemble_performance = self.metric(ensemble_, labels) * self.minimize
117+
trajectory.append(ensemble_performance)
118+
ensemble_size -= self.sorted_initialization_n_best
119+
120+
only_consider_indices = None
121+
if self.only_consider_n_best > 0:
122+
only_consider_indices = set(self._sorted_initialization(predictions, labels, self.only_consider_n_best))
123+
124+
for i in range(ensemble_size):
125+
scores = np.zeros([predictions.shape[0]])
126+
for j, pred in enumerate(predictions):
127+
if only_consider_indices and j not in only_consider_indices:
128+
scores[j] = float("inf")
129+
continue
130+
ensemble.append(pred)
131+
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
132+
scores[j] = self.metric(ensemble_prediction, labels) * self.minimize
133+
ensemble.pop()
134+
best = np.nanargmin(scores)
135+
ensemble.append(predictions[best])
136+
trajectory.append(scores[best])
137+
order.append(best)
138+
139+
# Handle special case
140+
if len(predictions) == 1:
141+
break
142+
143+
self.indices_ = np.array(order)
144+
self.trajectory_ = np.array(trajectory)
145+
self.train_score_ = trajectory[-1]
146+
147+
def _calculate_weights(self):
148+
ensemble_members = Counter(self.indices_).most_common()
149+
weights = np.zeros((self.num_input_models_,), dtype=float)
150+
for ensemble_member in ensemble_members:
151+
weight = float(ensemble_member[1]) / self.ensemble_size
152+
weights[ensemble_member[0]] = weight
153+
154+
if np.sum(weights) < 1:
155+
weights = weights / np.sum(weights)
156+
157+
self.weights_ = weights
158+
159+
def _sorted_initialization(self, predictions, labels, n_best):
160+
perf = np.zeros([predictions.shape[0]])
161+
162+
for idx, prediction in enumerate(predictions):
163+
perf[idx] = self.metric(prediction, labels) * self.minimize
164+
165+
indices = np.argsort(perf)[:n_best]
166+
return indices
167+
168+
def _bagging(self, predictions, labels, fraction=0.5, n_bags=20):
169+
"""Rich Caruana's ensemble selection method with bagging."""
170+
raise ValueError('Bagging might not work with class-based interface!')
171+
n_models = predictions.shape[0]
172+
bag_size = int(n_models * fraction)
173+
174+
for j in range(n_bags):
175+
# Bagging a set of models
176+
indices = sorted(random.sample(range(0, n_models), bag_size))
177+
bag = predictions[indices, :, :]
178+
self._fit(bag, labels)
179+
180+
def predict(self, predictions):
181+
if len(predictions) < len(self.weights_):
182+
weights = (weight for weight in self.weights_ if weight > 0)
183+
else:
184+
weights = self.weights_
185+
186+
for i, weight in enumerate(weights):
187+
predictions[i] *= weight
188+
return np.sum(predictions, axis=0)
189+
190+
def __str__(self):
191+
return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \
192+
'\n\tWeights: %s\n\tIdentifiers: %s' % \
193+
(' '.join(['%d: %5f' % (idx, performance)
194+
for idx, performance in enumerate(self.trajectory_)]),
195+
self.indices_, self.weights_,
196+
' '.join([str(identifier) for idx, identifier in
197+
enumerate(self.identifiers_)
198+
if self.weights_[idx] > 0]))
199+
200+
def get_models_with_weights(self, models):
201+
output = []
202+
203+
for i, weight in enumerate(self.weights_):
204+
identifier = self.identifiers_[i]
205+
if weight > 0.0:
206+
model = models[identifier]
207+
output.append((weight, model))
208+
209+
output.sort(reverse=True, key=lambda t: t[0])
210+
211+
return output
212+
213+
def get_selected_model_identifiers(self):
214+
output = []
215+
216+
for i, weight in enumerate(self.weights_):
217+
identifier = self.identifiers_[i]
218+
if weight > 0.0:
219+
output.append(identifier)
220+
221+
output.sort(reverse=True, key=lambda t: t[0])
222+
223+
return output
224+
225+
def get_validation_performance(self):
226+
return self.trajectory_[-1]

autoPyTorch/components/lr_scheduler/lr_schedulers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_config_space(*args, **kwargs):
3636
class SchedulerNone(AutoNetLearningRateSchedulerBase):
3737

3838
def _get_scheduler(self, optimizer, config):
39-
return NoScheduling()
39+
return NoScheduling(optimizer=optimizer)
4040

4141
class SchedulerStepLR(AutoNetLearningRateSchedulerBase):
4242

@@ -124,9 +124,17 @@ def get_config_space(*args, **kwargs):
124124

125125

126126
class NoScheduling():
127+
def __init__(self, optimizer):
128+
self.optimizer = optimizer
127129

128130
def step(self, epoch):
129131
return
132+
133+
def get_lr(self):
134+
try:
135+
return [self.optimizer.defaults["lr"]]
136+
except:
137+
return [None]
130138

131139

132140
import math

0 commit comments

Comments
 (0)