diff --git a/HISTORY.rst b/HISTORY.rst index 87372ef4..7ed49678 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,6 +9,10 @@ Unreleased particular, the parameters nesterov, momentum and lr are now set to the default values set by keras. +* All optimizers must now be passed in uninitialized. Optimizer parameters can + be set by passing `optimizer__{kwarg}` parameters to the learner. This + follows the scikit-learn and skorch standard. + 1.2.1 (2020-06-08) ------------------ diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index 4eec3496..507527e3 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -60,8 +60,10 @@ def __init__( Regularizer function applied to all the hidden weight matrices. activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index 3acb7932..3f06dcb0 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -21,7 +21,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, metrics=None, random_state=None, @@ -63,8 +63,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index 7e08f145..ae44171b 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -34,7 +34,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -79,8 +79,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int @@ -218,7 +220,7 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index 6ee9095d..f55964bb 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -53,8 +53,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 7d3e38dd..47a0ad5d 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -4,7 +4,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import concatenate from keras.layers import Dense from keras.optimizers import SGD @@ -29,7 +28,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -47,8 +46,7 @@ def __init__( self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units @@ -97,6 +95,7 @@ def construct_model(self): model: keras :class:`Model` Neural network to learn the CmpNet utility score """ + self._initialize_optimizer() x1x2 = concatenate([self.x1, self.x2]) x2x1 = concatenate([self.x2, self.x1]) self.logger.debug("Creating the model") @@ -110,7 +109,7 @@ def construct_model(self): merged_output = concatenate([N_g, N_l]) model = Model(inputs=[self.x1, self.x2], outputs=merged_output) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -212,7 +211,7 @@ def clear_memory(self, **kwargs): sess = tf.Session() K.set_session(sess) - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -255,8 +254,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 86e62dce..75ec200e 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -1,6 +1,5 @@ import logging -from keras import optimizers import keras.backend as K from keras.layers import Dense from keras.layers import Input @@ -29,7 +28,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, random_state=None, **kwargs, @@ -50,8 +49,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training random_state : int or object @@ -69,8 +70,7 @@ def __init__( self.kernel_initializer = kernel_initializer self.kernel_regularizer = kernel_regularizer self.batch_size = batch_size - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.joint_layers = None self.scorer = None keys = list(kwargs.keys()) @@ -78,6 +78,7 @@ def __init__( if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs + self._initialize_optimizer() self._construct_layers( activation=self.activation, kernel_initializer=self.kernel_initializer, @@ -167,8 +168,8 @@ def set_tunable_parameters( self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size # Hack to fix memory leak: - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( activation=self.activation, @@ -474,7 +475,7 @@ def construct_model(self, n_features, n_objects): model = Model(inputs=input_layer, outputs=scores) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -536,6 +537,7 @@ def fit( """ self.random_state_ = check_random_state(self.random_state) _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._fit( X=X, Y=Y, @@ -703,7 +705,7 @@ def clear_memory(self, n_objects=5, **kwargs): K.clear_session() sess = tf.Session() K.set_session(sess) - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( activation=self.activation, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 01bc1c40..8f6c20ec 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -5,7 +5,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import add from keras.layers import concatenate from keras.layers import Dense @@ -36,7 +35,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=None, batch_size=256, random_state=None, @@ -54,8 +53,7 @@ def __init__( self.num_subsample = num_subsample self.batch_size = batch_size self.hash_file = None - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self._use_zeroth_model = add_zeroth_order_model self.n_hidden = n_hidden self.n_units = n_units @@ -251,7 +249,7 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -282,6 +280,7 @@ def fit( Keyword arguments for the fit function """ _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -369,8 +368,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._pairwise_model = None self._zero_order_model = None self._construct_layers( @@ -402,7 +401,7 @@ def clear_memory(self, **kwargs): self._pairwise_model = None self._zero_order_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index 9d9c3f4c..b73e9b43 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -3,7 +3,6 @@ from keras import backend as K from keras import Input from keras import Model -from keras import optimizers from keras.layers import add from keras.layers import Dense from keras.layers import Lambda @@ -28,7 +27,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -41,8 +40,7 @@ def __init__( self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -101,7 +99,7 @@ def construct_model(self): output = self.output_node(merged_inputs) model = Model(inputs=[self.x1, self.x2], outputs=output) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -147,6 +145,7 @@ def fit( self.logger.debug("Instances created {}".format(X1.shape[0])) self.logger.debug("Creating the model") + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -217,7 +216,7 @@ def clear_memory(self, **kwargs): K.set_session(sess) self._scoring_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -260,8 +259,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._scoring_model = None self._construct_layers( kernel_regularizer=self.kernel_regularizer, diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index 1aca2dfa..5e8315d0 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -18,7 +18,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -59,8 +59,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 01451fa0..0da4e5f2 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -20,7 +20,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, random_state=None, **kwargs, @@ -61,8 +61,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index 0628c7e1..2705dada 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -32,7 +32,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=["categorical_accuracy"], batch_size=256, random_state=None, @@ -77,8 +77,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int @@ -262,7 +264,7 @@ def get_score_object(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index 26a91ed6..7fc5bf0a 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -18,7 +18,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -53,8 +53,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/learner.py b/csrank/learner.py index b7dff67b..12be59d7 100644 --- a/csrank/learner.py +++ b/csrank/learner.py @@ -4,7 +4,21 @@ from csrank.tunable import Tunable +def filter_dict_by_prefix(source, prefix): + result = dict() + for key in source.keys(): + if key.startswith(prefix): + key_stripped = key[len(prefix) :] + result[key_stripped] = source[key] + return result + + class Learner(Tunable, metaclass=ABCMeta): + def _initialize_optimizer(self): + optimizer_params = filter_dict_by_prefix(self.__dict__, "optimizer__") + optimizer_params.update(filter_dict_by_prefix(self.kwargs, "optimizer__")) + self.optimizer_ = self.optimizer(**optimizer_params) + @abstractmethod def fit(self, X, Y, **kwargs): """ diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 7afc8017..25d013b0 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -20,7 +20,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -62,8 +62,12 @@ def __init__( Regularizer function applied to all the hidden weight matrices. activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. + descent. Must be a function without arguments that returns a + Keras optimizer. metrics : list List of metrics to evaluate during training (can be non-differentiable) diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index cb0e180b..841e6bff 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -19,7 +19,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", kernel_regularizer=l2(0.01), - optimizer=SGD(), + optimizer=SGD, batch_size=256, loss_function=hinged_rank_loss, metrics=[zero_one_rank_loss_for_scores_ties], @@ -61,8 +61,10 @@ def __init__( Initialization function for the weights of each hidden layer kernel_regularizer : function or string Regularizer to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. batch_size : int Batch size to use for training loss_function : function diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index a1554dd8..02ee8bbb 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -23,7 +23,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="selu", - optimizer=SGD(), + optimizer=SGD, metrics=None, batch_size=256, random_state=None, @@ -67,8 +67,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : string or function Activation function to use in the hidden units - optimizer : string or function - Stochastic gradient optimizer + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of evaluation metrics (can be non-differentiable) batch_size : int diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index af2eddf0..74c136ab 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -2,7 +2,6 @@ from keras import backend as K from keras import Input -from keras import optimizers from keras.layers import concatenate from keras.layers import Dense from keras.models import Model @@ -34,7 +33,7 @@ def __init__( kernel_regularizer=l2(1e-4), activation="selu", kernel_initializer="lecun_normal", - optimizer=SGD(), + optimizer=SGD, metrics=[zero_one_rank_loss_for_scores_ties], batch_size=256, random_state=None, @@ -70,8 +69,10 @@ def __init__( Type of activation function to use in each hidden layer kernel_initializer : function or string Initialization function for the weights of each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) @@ -94,8 +95,7 @@ def __init__( self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.loss_function = loss_function - self.optimizer = optimizers.get(optimizer) - self._optimizer_config = self.optimizer.get_config() + self.optimizer = optimizer self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -170,6 +170,7 @@ def fit( """ self.random_state_ = check_random_state(self.random_state) _n_instances, _n_objects, self.n_object_features_fit_ = X.shape + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -213,7 +214,7 @@ def construct_model(self): merged = concatenate(outputs) model = Model(inputs=self.input_layer, outputs=merged) model.compile( - loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics + loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics ) return model @@ -274,7 +275,7 @@ def clear_memory(self, **kwargs): sess = tf.Session() K.set_session(sess) self._scoring_model = None - self.optimizer = self.optimizer.from_config(self._optimizer_config) + self._initialize_optimizer() self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -317,8 +318,8 @@ def set_tunable_parameters( self.n_units = n_units self.kernel_regularizer = l2(reg_strength) self.batch_size = batch_size - self.optimizer = self.optimizer.from_config(self._optimizer_config) - K.set_value(self.optimizer.lr, learning_rate) + self._initialize_optimizer() + K.set_value(self.optimizer_.lr, learning_rate) self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index 53cd3f4f..dc98c4ac 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -20,7 +20,7 @@ def __init__( kernel_regularizer=l2(1e-4), kernel_initializer="lecun_normal", activation="relu", - optimizer=SGD(), + optimizer=SGD, metrics=["binary_accuracy"], batch_size=256, random_state=None, @@ -54,8 +54,10 @@ def __init__( Initialization function for the weights of each hidden layer activation : function or string Type of activation function to use in each hidden layer - optimizer : function or string - Optimizer to use during stochastic gradient descent + optimizer: Class + Uninitialized optimizer class following the keras optimizer interface. + optimizer__{kwarg} + Arguments to be passed to the optimizer on initialization, such as optimizer__lr. metrics : list List of metrics to evaluate during training (can be non-differentiable) batch_size : int diff --git a/csrank/tests/test_choice_functions.py b/csrank/tests/test_choice_functions.py index 5702e3f5..46d760db 100644 --- a/csrank/tests/test_choice_functions.py +++ b/csrank/tests/test_choice_functions.py @@ -27,7 +27,12 @@ "Informedness": instance_informedness, "AucScore": auc_score, } -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} def get_vals(values): @@ -37,7 +42,7 @@ def get_vals(values): choice_functions = { FETA_CHOICE: ( FETAChoiceFunction, - {"add_zeroth_order_model": True, "optimizer": optimizer}, + {"add_zeroth_order_model": True, **optimizer_common_args}, get_vals([0.946, 0.9684, 0.9998]), ), FATE_CHOICE: ( @@ -47,7 +52,7 @@ def get_vals(values): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, + **optimizer_common_args, }, get_vals([0.8185, 0.6070, 0.9924]), ), @@ -63,12 +68,12 @@ def get_vals(values): ), RANKNET_CHOICE: ( RankNetChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.9522, 0.9866, 1.0]), ), CMPNET_CHOICE: ( CmpNetChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.8554, 0.8649, 0.966]), ), GLM_CHOICE: (GeneralizedLinearModel, {}, get_vals([0.9567, 0.9955, 1.0])), diff --git a/csrank/tests/test_discrete_choice.py b/csrank/tests/test_discrete_choice.py index 5a2e0223..c0a4b842 100644 --- a/csrank/tests/test_discrete_choice.py +++ b/csrank/tests/test_discrete_choice.py @@ -30,7 +30,12 @@ "CategoricalAccuracy": categorical_accuracy_np, "CategoricalTopK2": topk_categorical_accuracy_np(k=2), } -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} def get_vals(values=[1.0, 1.0]): @@ -40,17 +45,17 @@ def get_vals(values=[1.0, 1.0]): discrete_choice_functions = { FETA_DC: ( FETADiscreteChoiceFunction, - {"n_hidden": 1, "optimizer": optimizer}, + {"n_hidden": 1, **optimizer_common_args}, get_vals([0.978, 1.0]), ), RANKNET_DC: ( RankNetDiscreteChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.97, 0.996]), ), CMPNET_DC: ( CmpNetDiscreteChoiceFunction, - {"optimizer": optimizer}, + optimizer_common_args.copy(), get_vals([0.994, 1.0]), ), FATE_DC: ( @@ -60,7 +65,7 @@ def get_vals(values=[1.0, 1.0]): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, + **optimizer_common_args, }, get_vals([0.95, 0.998]), ), diff --git a/csrank/tests/test_fate.py b/csrank/tests/test_fate.py index 1242e5e1..bae80817 100644 --- a/csrank/tests/test_fate.py +++ b/csrank/tests/test_fate.py @@ -7,7 +7,7 @@ from csrank import FATENetworkCore from csrank import FATEObjectRanker -from csrank.tests.test_ranking import optimizer +from csrank.tests.test_ranking import optimizer_common_args def test_construction_core(): @@ -33,6 +33,7 @@ def fit(self, *args, **kwargs): pass grc = MockClass() + grc._initialize_optimizer() grc._construct_layers( activation=grc.activation, kernel_initializer=grc.kernel_initializer, @@ -42,7 +43,7 @@ def fit(self, *args, **kwargs): scores = grc.join_input_layers(input_layer, None, n_layers=0, n_objects=n_objects) model = Model(inputs=input_layer, outputs=scores) - model.compile(loss="mse", optimizer=grc.optimizer) + model.compile(loss="mse", optimizer=grc.optimizer_) X = np.random.randn(100, n_objects, n_features) y = X.sum(axis=2) model.fit(x=X, y=y, verbose=0) @@ -59,8 +60,8 @@ def fit(self, *args, **kwargs): assert grc.batch_size == params["batch_size"] rtol = 1e-2 atol = 1e-4 - key = "learning_rate" if "learning_rate" in grc.optimizer.get_config() else "lr" - learning_rate = grc.optimizer.get_config().get(key, 0.0) + key = "learning_rate" if "learning_rate" in grc.optimizer_.get_config() else "lr" + learning_rate = grc.optimizer_.get_config().get(key, 0.0) assert np.isclose( learning_rate, params["learning_rate"], rtol=rtol, atol=atol, equal_nan=False ) @@ -88,7 +89,7 @@ def trivial_ranking_problem_generator(): n_hidden_joint_units=5, n_hidden_set_units=5, kernel_regularizer=l2(1e-4), - optimizer=optimizer, + **optimizer_common_args, ) fate.fit_generator( generator=trivial_ranking_problem_generator(), diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py index 69186f73..8aa9a126 100644 --- a/csrank/tests/test_ranking.py +++ b/csrank/tests/test_ranking.py @@ -20,7 +20,12 @@ from csrank.objectranking import * from csrank.objectranking.fate_object_ranker import FATEObjectRanker -optimizer = SGD(lr=1e-3, momentum=0.9, nesterov=True) +optimizer_common_args = { + "optimizer": SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, +} object_rankers = { FATELINEAR_RANKER: ( @@ -31,12 +36,12 @@ FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)), FETA_RANKER: ( FETAObjectRanker, - {"add_zeroth_order_model": True, "optimizer": optimizer}, + {"add_zeroth_order_model": True, **optimizer_common_args}, (0.0, 1.0), ), - RANKNET: (RankNet, {"optimizer": optimizer}, (0.0, 1.0)), - CMPNET: (CmpNet, {"optimizer": optimizer}, (0.0, 1.0)), - LISTNET: (ListNet, {"n_top": 3, "optimizer": optimizer}, (0.0, 1.0)), + RANKNET: (RankNet, optimizer_common_args.copy(), (0.0, 1.0)), + CMPNET: (CmpNet, optimizer_common_args.copy(), (0.0, 1.0),), + LISTNET: (ListNet, {"n_top": 3, **optimizer_common_args}, (0.0, 1.0)), ERR: (ExpectedRankRegression, {}, (0.0, 1.0)), RANKSVM: (RankSVM, {}, (0.0, 1.0)), FATE_RANKER: ( @@ -46,7 +51,6 @@ "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - "optimizer": optimizer, }, (0.0, 1.0), ), @@ -79,16 +83,6 @@ def check_params_tunable(tunable_obj, params, rtol=1e-2, atol=1e-4): ) else: assert value == expected - elif key == "learning_rate" and hasattr(tunable_obj, "optimizer"): - key = ( - "learning_rate" - if "learning_rate" in tunable_obj.optimizer.get_config() - else "lr" - ) - learning_rate = tunable_obj.optimizer.get_config().get(key, 0.0) - assert np.isclose( - learning_rate, value, rtol=rtol, atol=atol, equal_nan=False - ) elif key == "reg_strength" and hasattr(tunable_obj, "kernel_regularizer"): config = tunable_obj.kernel_regularizer.get_config() val1 = np.isclose( diff --git a/docs/notebooks/FATE-Net-DC.ipynb b/docs/notebooks/FATE-Net-DC.ipynb index 5997dfaa..7dad5bff 100644 --- a/docs/notebooks/FATE-Net-DC.ipynb +++ b/docs/notebooks/FATE-Net-DC.ipynb @@ -140,7 +140,10 @@ "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", " loss_function=smooth_rank_loss,\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD,\n", + " optimizer__lr=1e-4,\n", + " optimizer__nesterov=True,\n", + " optimizer__momentum=0.9)" ] }, { diff --git a/docs/notebooks/FATE-Net-Ranking.ipynb b/docs/notebooks/FATE-Net-Ranking.ipynb index 22a12b2b..f2674972 100644 --- a/docs/notebooks/FATE-Net-Ranking.ipynb +++ b/docs/notebooks/FATE-Net-Ranking.ipynb @@ -132,7 +132,10 @@ "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", " loss_function=smooth_rank_loss,\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD,\n", + " optimizer__lr=1e-4,\n", + " optimizer__nesterov=True,\n", + " optimizer__momentum=0.9)" ] }, { diff --git a/docs/notebooks/Rank-Net-Choice.ipynb b/docs/notebooks/Rank-Net-Choice.ipynb index d038a760..e882456c 100644 --- a/docs/notebooks/Rank-Net-Choice.ipynb +++ b/docs/notebooks/Rank-Net-Choice.ipynb @@ -124,7 +124,7 @@ "outputs": [], "source": [ "ranknet = RankNetChoiceFunction(\n", - " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" + " optimizer=SGD, optimizer__lr=1e-4, optimizer__nesterov=True, optimizer__momentum=0.9)" ] }, { diff --git a/requirements-dev.txt b/requirements-dev.txt index e496cb1e..3ba551cc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,7 @@ psycopg2-binary>=2.7 docopt>=0.6.0 joblib>=0.9.4 tqdm>=4.11.2 -keras>=2.3 +keras>=2.3,<2.4 pymc3>=3.8 theano>=1.0 # Pick either CPU or GPU version of tensorflow: diff --git a/setup.py b/setup.py index 102745b0..f2610f0b 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "docopt>=0.6.0", "joblib>=0.9.4", "tqdm>=4.11.2", - "keras>=2.3", + "keras>=2.3,<2.4", # 2.4 delegates to tf, needs tf2 # Pick either CPU or GPU version of tensorflow: "tensorflow>=1.5,<2.0", # tensorflow-gpu>=1.0.1"