From 95a0ad27f39ca5dafb014a3f2a3af099097b8232 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 22 May 2020 20:45:59 +0200 Subject: [PATCH 1/2] Require uninitialized optimizers for our learners An initialized optimizer is a tensorflow object, which (at least in graph mode in tf1) is not deepcopy-able. Even if we were able to deeocopy it, we probably wouldn't want to since it contains state. Scikit-learn needs to be able to deepcopy an estimators arguments so that it can create copies and derivatives of it. Instead we require the uninitialized optimizer and its parameters to be passed to our learners separately. The learner can then initialize the optimizer as needed. --- HISTORY.rst | 4 +++ csrank/choicefunction/cmpnet_choice.py | 8 +++--- csrank/choicefunction/fate_choice.py | 8 +++--- csrank/choicefunction/feta_choice.py | 10 ++++--- csrank/choicefunction/ranknet_choice.py | 8 +++--- csrank/core/cmpnet_core.py | 15 +++++------ csrank/core/fate_network.py | 22 +++++++++------- csrank/core/feta_network.py | 15 +++++------ csrank/core/ranknet_core.py | 15 +++++------ .../discretechoice/cmpnet_discrete_choice.py | 8 +++--- csrank/discretechoice/fate_discrete_choice.py | 8 +++--- csrank/discretechoice/feta_discrete_choice.py | 10 ++++--- .../discretechoice/ranknet_discrete_choice.py | 8 +++--- csrank/learner.py | 14 ++++++++++ csrank/objectranking/cmp_net.py | 10 ++++--- csrank/objectranking/fate_object_ranker.py | 8 +++--- csrank/objectranking/feta_object_ranker.py | 8 +++--- csrank/objectranking/list_net.py | 21 ++++++++------- csrank/objectranking/rank_net.py | 8 +++--- csrank/tests/test_choice_functions.py | 15 +++++++---- csrank/tests/test_discrete_choice.py | 15 +++++++---- csrank/tests/test_fate.py | 11 ++++---- csrank/tests/test_ranking.py | 26 +++++++------------ docs/notebooks/FATE-Net-DC.ipynb | 5 +++- docs/notebooks/FATE-Net-Ranking.ipynb | 5 +++- docs/notebooks/Rank-Net-Choice.ipynb | 2 +- 26 files changed, 171 insertions(+), 116 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 87372ef4..7ed49678 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,6 +9,10 @@ Unreleased particular, the parameters nesterov, momentum and lr are now set to the default values set by keras. +* All optimizers must now be passed in uninitialized. Optimizer parameters can + be set by passing `optimizer__{kwarg}` parameters to the learner. This + follows the scikit-learn and skorch standard. + 1.2.1 (2020-06-08) ------------------ diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index 4eec3496..507527e3 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -60,8 +60,10 @@ def __init__( Regularizer function applied to all the hidden weight matrices. activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index 3acb7932..3f06dcb0 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -21,7 +21,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, metrics=None, random_state=None, @@ -63,8 +63,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index 7e08f145..ae44171b 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -34,7 +34,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -79,8 +79,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int @@ -218,7 +220,7 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index 6ee9095d..f55964bb 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -53,8 +53,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 7d3e38dd..47a0ad5d 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -4,7 +4,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import concatenate from keras.layers import Dense from keras.optimizers import SGD @@ -29,7 +28,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -47,8 +46,7 @@ def __init__( self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units @@ -97,6 +95,7 @@ def construct_model(self): model: keras :class:`Model` Neural network to learn the CmpNet utility score """ + self._initialize_optimizer() x1x2 = concatenate([self.x1, self.x2]) x2x1 = concatenate([self.x2, self.x1]) self.logger.debug("Creating the model") @@ -110,7 +109,7 @@ def construct_model(self): merged_output = concatenate([N_g, N_l]) model = Model(inputs=[self.x1, self.x2], outputs=merged_output) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -212,7 +211,7 @@ def clear_memory(self, **kwargs): sess = tf.Session() K.set_session(sess) - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -255,8 +254,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 86e62dce..75ec200e 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -1,6 +1,5 @@ import logging -from keras import optimizers import keras.backend as K from keras.layers import Dense from keras.layers import Input @@ -29,7 +28,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, random_state=None, **kwargs, @@ -50,8 +49,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training random_state : int or object @@ -69,8 +70,7 @@ def __init__( self.kernel_initializer = kernel_initializer self.kernel_regularizer = kernel_regularizer self.batch_size = batch_size - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.joint_layers = None self.scorer = None keys = list(kwargs.keys()) @@ -78,6 +78,7 @@ def __init__( if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs + self._initialize_optimizer() self._construct_layers( activation=self.activation, kernel_initializer=self.kernel_initializer, @@ -167,8 +168,8 @@ def set_tunable_parameters( self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size # Hack to fix memory leak: - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( activation=self.activation, @@ -474,7 +475,7 @@ def construct_model(self, n_features, n_objects): model = Model(inputs=input_layer, outputs=scores) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -536,6 +537,7 @@ def fit( """ self.random_state_ = check_random_state(self.random_state) _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._fit( X=X, Y=Y, @@ -703,7 +705,7 @@ def clear_memory(self, n_objects=5, **kwargs): K.clear_session() sess = tf.Session() K.set_session(sess) - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( activation=self.activation, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 01bc1c40..8f6c20ec 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -5,7 +5,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import add from keras.layers import concatenate from keras.layers import Dense @@ -36,7 +35,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=None, batch_size=256, random_state=None, @@ -54,8 +53,7 @@ def __init__( self.num_subsample = num_subsample self.batch_size = batch_size self.hash_file = None - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self._use_zeroth_model = add_zeroth_order_model self.n_hidden = n_hidden self.n_units = n_units @@ -251,7 +249,7 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -282,6 +280,7 @@ def fit( Keyword arguments for the fit function """ _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -369,8 +368,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._pairwise_model = None self._zero_order_model = None self._construct_layers( @@ -402,7 +401,7 @@ def clear_memory(self, **kwargs): self._pairwise_model = None self._zero_order_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index 9d9c3f4c..b73e9b43 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -3,7 +3,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import add from keras.layers import Dense from keras.layers import Lambda @@ -28,7 +27,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -41,8 +40,7 @@ def __init__( self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -101,7 +99,7 @@ def construct_model(self): output = self.output_node(merged_inputs) model = Model(inputs=[self.x1, self.x2], outputs=output) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -147,6 +145,7 @@ def fit( self.logger.debug("Instances created {}".format(X1.shape[0])) self.logger.debug("Creating the model") + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -217,7 +216,7 @@ def clear_memory(self, **kwargs): K.set_session(sess) self._scoring_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -260,8 +259,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._scoring_model = None self._construct_layers( kernel_regularizer=self.kernel_regularizer, diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index 1aca2dfa..5e8315d0 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -18,7 +18,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -59,8 +59,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 01451fa0..0da4e5f2 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -20,7 +20,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, random_state=None, **kwargs, @@ -61,8 +61,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index 0628c7e1..2705dada 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -32,7 +32,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=["categorical_accuracy"], batch_size=256, random_state=None, @@ -77,8 +77,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int @@ -262,7 +264,7 @@ def get_score_object(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index 26a91ed6..7fc5bf0a 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -18,7 +18,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -53,8 +53,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/learner.py b/csrank/learner.py index b7dff67b..12be59d7 100644 --- a/csrank/learner.py +++ b/csrank/learner.py @@ -4,7 +4,21 @@ from csrank.tunable import Tunable +def filter_dict_by_prefix(source, prefix): + result = dict() + for key in source.keys(): + if key.startswith(prefix): + key_stripped = key[len(prefix) :] + result[key_stripped] = source[key] + return result + + class Learner(Tunable, metaclass=ABCMeta): + def _initialize_optimizer(self): + optimizer_params = filter_dict_by_prefix(self.__dict__, "optimizer__") + optimizer_params.update(filter_dict_by_prefix(self.kwargs, "optimizer__")) + self.optimizer_ = self.optimizer(**optimizer_params) + @abstractmethod def fit(self, X, Y, **kwargs): """ diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 7afc8017..25d013b0 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -20,7 +20,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -62,8 +62,12 @@ def __init__( Regularizer function applied to all the hidden weight matrices. activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. + descent. Must be a function without arguments that returns a + Keras optimizer. metrics : list List of metrics to evaluate during training (can be non-differentiable) diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index cb0e180b..841e6bff 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -19,7 +19,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, loss_function=hinged_rank_loss, metrics=[zero_one_rank_loss_for_scores_ties], @@ -61,8 +61,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index a1554dd8..02ee8bbb 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -23,7 +23,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=None, batch_size=256, random_state=None, @@ -67,8 +67,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index af2eddf0..74c136ab 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -2,7 +2,6 @@ from keras import backend as K from keras import Input -from keras import optimizers from keras.layers import concatenate from keras.layers import Dense from keras.models import Model @@ -34,7 +33,7 @@ def __init__( kernel_regularizer=l2(1e-4), activation="selu", kernel_initializer="lecun_normal", - optimizer=SGD(), + optimizer=SGD, metrics=[zero_one_rank_loss_for_scores_ties], batch_size=256, random_state=None, @@ -70,8 +69,10 @@ def __init__( Type of activation function to use in each hidden layer kernel_initializer : function or string Initialization function for the weights of each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) @@ -94,8 +95,7 @@ def __init__( self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -170,6 +170,7 @@ def fit( """ self.random_state_ = check_random_state(self.random_state) _n_instances, _n_objects, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -213,7 +214,7 @@ def construct_model(self): merged = concatenate(outputs) model = Model(inputs=self.input_layer, outputs=merged) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -274,7 +275,7 @@ def clear_memory(self, **kwargs): sess = tf.Session() K.set_session(sess) self._scoring_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -317,8 +318,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index 53cd3f4f..dc98c4ac 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -20,7 +20,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -54,8 +54,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/tests/test_choice_functions.py b/csrank/tests/test_choice_functions.py index 5702e3f5..46d760db 100644 --- a/csrank/tests/test_choice_functions.py +++ b/csrank/tests/test_choice_functions.py @@ -27,7 +27,12 @@ "Informedness": instance_informedness, "AucScore": auc_score, } -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} def get_vals(values): @@ -37,7 +42,7 @@ def get_vals(values): choice_functions = { FETA_CHOICE: ( FETAChoiceFunction, - {"add_zeroth_order_model": True, "optimizer": optimizer}, + {"add_zeroth_order_model": True, **optimizer_common_args}, get_vals([0.946, 0.9684, 0.9998]), ), FATE_CHOICE: ( @@ -47,7 +52,7 @@ def get_vals(values): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, + **optimizer_common_args, }, get_vals([0.8185, 0.6070, 0.9924]), ), @@ -63,12 +68,12 @@ def get_vals(values): ), RANKNET_CHOICE: ( RankNetChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.9522, 0.9866, 1.0]), ), CMPNET_CHOICE: ( CmpNetChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.8554, 0.8649, 0.966]), ), GLM_CHOICE: (GeneralizedLinearModel, {}, get_vals([0.9567, 0.9955, 1.0])), diff --git a/csrank/tests/test_discrete_choice.py b/csrank/tests/test_discrete_choice.py index 5a2e0223..c0a4b842 100644 --- a/csrank/tests/test_discrete_choice.py +++ b/csrank/tests/test_discrete_choice.py @@ -30,7 +30,12 @@ "CategoricalAccuracy": categorical_accuracy_np, "CategoricalTopK2": topk_categorical_accuracy_np(k=2), } -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} def get_vals(values=[1.0, 1.0]): @@ -40,17 +45,17 @@ def get_vals(values=[1.0, 1.0]): discrete_choice_functions = { FETA_DC: ( FETADiscreteChoiceFunction, - {"n_hidden": 1, "optimizer": optimizer}, + {"n_hidden": 1, **optimizer_common_args}, get_vals([0.978, 1.0]), ), RANKNET_DC: ( RankNetDiscreteChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.97, 0.996]), ), CMPNET_DC: ( CmpNetDiscreteChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.994, 1.0]), ), FATE_DC: ( @@ -60,7 +65,7 @@ def get_vals(values=[1.0, 1.0]): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, + **optimizer_common_args, }, get_vals([0.95, 0.998]), ), diff --git a/csrank/tests/test_fate.py b/csrank/tests/test_fate.py index 1242e5e1..bae80817 100644 --- a/csrank/tests/test_fate.py +++ b/csrank/tests/test_fate.py @@ -7,7 +7,7 @@ from csrank import FATENetworkCore from csrank import FATEObjectRanker -from csrank.tests.test_ranking import optimizer +from csrank.tests.test_ranking import optimizer_common_args def test_construction_core(): @@ -33,6 +33,7 @@ def fit(self, *args, **kwargs): pass grc = MockClass() + grc._initialize_optimizer() grc._construct_layers( activation=grc.activation, kernel_initializer=grc.kernel_initializer, @@ -42,7 +43,7 @@ def fit(self, *args, **kwargs): scores = grc.join_input_layers(input_layer, None, n_layers=0, n_objects=n_objects) model = Model(inputs=input_layer, outputs=scores) - model.compile(loss="mse", optimizer=grc.optimizer) + model.compile(loss="mse", optimizer=grc.optimizer_) X = np.random.randn(100, n_objects, n_features) y = X.sum(axis=2) model.fit(x=X, y=y, verbose=0) @@ -59,8 +60,8 @@ def fit(self, *args, **kwargs): assert grc.batch_size == params["batch_size"] rtol = 1e-2 atol = 1e-4 - key = "learning_rate" if "learning_rate" in grc.optimizer.get_config() else "lr" - learning_rate = grc.optimizer.get_config().get(key, 0.0) + key = "learning_rate" if "learning_rate" in grc.optimizer_.get_config() else "lr" + learning_rate = grc.optimizer_.get_config().get(key, 0.0) assert np.isclose( learning_rate, params["learning_rate"], rtol=rtol, atol=atol, equal_nan=False ) @@ -88,7 +89,7 @@ def trivial_ranking_problem_generator(): n_hidden_joint_units=5, n_hidden_set_units=5, kernel_regularizer=l2(1e-4), - optimizer=optimizer, + **optimizer_common_args, ) fate.fit_generator( generator=trivial_ranking_problem_generator(), diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py index 69186f73..8aa9a126 100644 --- a/csrank/tests/test_ranking.py +++ b/csrank/tests/test_ranking.py @@ -20,7 +20,12 @@ from csrank.objectranking import * from csrank.objectranking.fate_object_ranker import FATEObjectRanker -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} object_rankers = { FATELINEAR_RANKER: ( @@ -31,12 +36,12 @@ FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)), FETA_RANKER: ( FETAObjectRanker, - {"add_zeroth_order_model": True, "optimizer": optimizer}, + {"add_zeroth_order_model": True, **optimizer_common_args}, (0.0, 1.0), ), - RANKNET: (RankNet, {"optimizer": optimizer}, (0.0, 1.0)), - CMPNET: (CmpNet, {"optimizer": optimizer}, (0.0, 1.0)), - LISTNET: (ListNet, {"n_top": 3, "optimizer": optimizer}, (0.0, 1.0)), + RANKNET: (RankNet, optimizer_common_args.copy(), (0.0, 1.0)), + CMPNET: (CmpNet, optimizer_common_args.copy(), (0.0, 1.0),), + LISTNET: (ListNet, {"n_top": 3, **optimizer_common_args}, (0.0, 1.0)), ERR: (ExpectedRankRegression, {}, (0.0, 1.0)), RANKSVM: (RankSVM, {}, (0.0, 1.0)), FATE_RANKER: ( @@ -46,7 +51,6 @@ "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, }, (0.0, 1.0), ), @@ -79,16 +83,6 @@ def check_params_tunable(tunable_obj, params, rtol=1e-2, atol=1e-4): ) else: assert value == expected - elif key == "learning_rate" and hasattr(tunable_obj, "optimizer"): - key = ( - "learning_rate" - if "learning_rate" in tunable_obj.optimizer.get_config() - else "lr" - ) - learning_rate = tunable_obj.optimizer.get_config().get(key, 0.0) - assert np.isclose( - learning_rate, value, rtol=rtol, atol=atol, equal_nan=False - ) elif key == "reg_strength" and hasattr(tunable_obj, "kernel_regularizer"): config = tunable_obj.kernel_regularizer.get_config() val1 = np.isclose( diff --git a/docs/notebooks/FATE-Net-DC.ipynb b/docs/notebooks/FATE-Net-DC.ipynb index 5997dfaa..7dad5bff 100644 --- a/docs/notebooks/FATE-Net-DC.ipynb +++ b/docs/notebooks/FATE-Net-DC.ipynb @@ -140,7 +140,10 @@ "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", " loss_function=smooth_rank_loss,\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD,\n", + " optimizer__lr=1e-4,\n", + " optimizer__nesterov=True,\n", + " optimizer__momentum=0.9)" ] }, { diff --git a/docs/notebooks/FATE-Net-Ranking.ipynb b/docs/notebooks/FATE-Net-Ranking.ipynb index 22a12b2b..f2674972 100644 --- a/docs/notebooks/FATE-Net-Ranking.ipynb +++ b/docs/notebooks/FATE-Net-Ranking.ipynb @@ -132,7 +132,10 @@ "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", " loss_function=smooth_rank_loss,\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD,\n", + " optimizer__lr=1e-4,\n", + " optimizer__nesterov=True,\n", + " optimizer__momentum=0.9)" ] }, { diff --git a/docs/notebooks/Rank-Net-Choice.ipynb b/docs/notebooks/Rank-Net-Choice.ipynb index d038a760..e882456c 100644 --- a/docs/notebooks/Rank-Net-Choice.ipynb +++ b/docs/notebooks/Rank-Net-Choice.ipynb @@ -124,7 +124,7 @@ "outputs": [], "source": [ "ranknet = RankNetChoiceFunction(\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD, optimizer__lr=1e-4, optimizer__nesterov=True, optimizer__momentum=0.9)" ] }, { From 702bbcc53f7755648ee5224bf85b299dedd7c1c9 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Fri, 26 Jun 2020 15:20:14 +0200 Subject: [PATCH 2/2] Pin keras to <2.4 Newer keras versions delegate to tf.keras and therefore need tf2. See https://github.com/keras-team/keras/releases/tag/2.4.0. --- requirements-dev.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index e496cb1e..3ba551cc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,7 @@ psycopg2-binary>=2.7 docopt>=0.6.0 joblib>=0.9.4 tqdm>=4.11.2 -keras>=2.3 +keras>=2.3,<2.4 pymc3>=3.8 theano>=1.0 # Pick either CPU or GPU version of tensorflow: diff --git a/setup.py b/setup.py index 102745b0..f2610f0b 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "docopt>=0.6.0", "joblib>=0.9.4", "tqdm>=4.11.2", - "keras>=2.3", + "keras>=2.3,<2.4", # 2.4 delegates to tf, needs tf2 # Pick either CPU or GPU version of tensorflow: "tensorflow>=1.5,<2.0", # tensorflow-gpu>=1.0.1"