From 1ef33f9059566c1ab42fcc05af5a53b391d90ecf Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Tue, 12 May 2020 15:05:16 +0200 Subject: [PATCH 1/6] Make the Learner inherit from BaseEstimator That will give some of the scikit-learn estimator API (such as set_params and get_params) for free. --- csrank/learner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/csrank/learner.py b/csrank/learner.py index 12be59d7..a7fb92b8 100644 --- a/csrank/learner.py +++ b/csrank/learner.py @@ -1,6 +1,8 @@ from abc import ABCMeta from abc import abstractmethod +from sklearn.base import BaseEstimator + from csrank.tunable import Tunable @@ -13,7 +15,7 @@ def filter_dict_by_prefix(source, prefix): return result -class Learner(Tunable, metaclass=ABCMeta): +class Learner(Tunable, BaseEstimator, metaclass=ABCMeta): def _initialize_optimizer(self): optimizer_params = filter_dict_by_prefix(self.__dict__, "optimizer__") optimizer_params.update(filter_dict_by_prefix(self.kwargs, "optimizer__")) From 0ae5162396fd42665c3a4a8393a6e298d6981300 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 2 May 2020 17:21:56 +0200 Subject: [PATCH 2/6] Improve the FETALinear documentation --- csrank/core/feta_linear.py | 79 +++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py index 4f0c875f..8566cfa6 100644 --- a/csrank/core/feta_linear.py +++ b/csrank/core/feta_linear.py @@ -13,6 +13,21 @@ class FETALinearCore(Learner): + """Core Learner implementing the First Evaluate then Aggregate approach. + + This implements a linear variant of the FETA approach introduced in + [PfGuH18]. The idea is to first evaluate each object in each sub-context of + fixed size with a linear function approximator and then to aggregate these + evaluations. + + References + ---------- + + .. [PfGuH18] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2018). Deep + architectures for learning context-dependent ranking functions. arXiv + preprint arXiv:1803.05796. https://arxiv.org/pdf/1803.05796.pdf + """ + def __init__( self, learning_rate=1e-3, @@ -23,6 +38,25 @@ def __init__( random_state=None, **kwargs, ): + """ + Parameters + ---------- + learning_rate : float + The learning rate used by the gradient descent optimizer. + batch_size : int + The size of the mini-batches used to train the Neural Network. + loss_function + The loss function to minimize when training the Neural Network. See + the functions offered in the keras.losses module for more details. + epochs_drop: int + The amount of training epochs after which the learning rate is + decreased by a factor of `drop`. + drop: float + The factor by which to decrease the learning rate every + `epochs_drop` epochs. + random_state: np.RandomState + The random state to use in this object. + """ self.learning_rate = learning_rate self.batch_size = batch_size self.random_state = random_state @@ -90,6 +124,18 @@ def _construct_model_(self, n_objects): ) def step_decay(self, epoch): + """Update the current learning rate. + + Computes the current learning rate based on the initial learning rate, + the current epoch and the decay speed set by the `epochs_drop` and + `drop` hyperparameters. + + Parameters + ---------- + + epoch: int + The current epoch. + """ step = math.floor((1 + epoch) / self.epochs_drop) self.current_lr = self.learning_rate * math.pow(self.drop, step) self.optimizer = tf.train.GradientDescentOptimizer(self.current_lr).minimize( @@ -99,6 +145,22 @@ def step_decay(self, epoch): def fit( self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd ): + """ + Fit the preference learning algorithm on the provided set of queries X + and preferences Y of those objects. The provided queries and + corresponding preferences are of a fixed size (numpy arrays). + + Parameters + ---------- + X : array-like, shape (n_samples, n_objects, n_features) + Feature vectors of the objects + Y : array-like, shape (n_samples, n_objects) + Preferences of the objects in form of rankings or choices + epochs: int + The amount of epochs to train for. The training loop will try to + predict the target variables and adjust its parameters by gradient + descent `epochs` times. + """ self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape @@ -146,6 +208,18 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): self.logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) def _predict_scores_fixed(self, X, **kwargs): + """Predict the scores for a given collection of sets of objects of same size. + + Parameters + ---------- + X : array-like, shape (n_samples, n_objects, n_features) + + + Returns + ------- + Y : array-like, shape (n_samples, n_objects) + Returns the scores of each of the objects for each of the samples. + """ n_instances, n_objects, n_features = X.shape assert n_features == self.n_object_features_fit_ outputs = [list() for _ in range(n_objects)] @@ -168,7 +242,10 @@ def set_tunable_parameters( self, learning_rate=1e-3, batch_size=128, epochs_drop=300, drop=0.1, **point ): """ - Set tunable parameters of the FETA-network to the values provided. + Set tunable hyperparameters of the FETA-network to the values provided. + + This can be used for automatic hyperparameter optimization. See + csrank.tuning for more information. Parameters ---------- From e61f7f267fafdbcd6ffea57043d9c052db931a2f Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 27 Jun 2020 18:03:17 +0200 Subject: [PATCH 3/6] Convert default parameter lists to tuples Tuples are immutable and the scikit-learn estimator API requires estimator parameters to be immutable (to make cloning work properly). Since we do not use the parameters in a mutable way, this doesn't make a difference. --- csrank/choicefunction/cmpnet_choice.py | 2 +- csrank/choicefunction/fate_choice.py | 2 +- csrank/choicefunction/feta_choice.py | 6 ++++-- csrank/choicefunction/ranknet_choice.py | 2 +- csrank/core/cmpnet_core.py | 6 ++++-- csrank/core/fate_network.py | 4 +++- csrank/core/feta_network.py | 6 ++++-- csrank/core/ranknet_core.py | 6 ++++-- csrank/discretechoice/cmpnet_discrete_choice.py | 2 +- csrank/discretechoice/fate_discrete_choice.py | 2 +- csrank/discretechoice/feta_discrete_choice.py | 6 ++++-- csrank/discretechoice/ranknet_discrete_choice.py | 2 +- csrank/objectranking/cmp_net.py | 2 +- csrank/objectranking/fate_object_ranker.py | 2 +- csrank/objectranking/feta_object_ranker.py | 2 +- csrank/objectranking/list_net.py | 6 ++++-- csrank/objectranking/rank_net.py | 2 +- 17 files changed, 37 insertions(+), 23 deletions(-) diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index 507527e3..887b21a8 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -20,7 +20,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index 3f06dcb0..ab6348f5 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -23,7 +23,7 @@ def __init__( kernel_regularizer=l2(0.01), optimizer=SGD, batch_size=256, - metrics=None, + metrics=(), random_state=None, **kwargs, ): diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index ae44171b..2c06c93f 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -35,7 +35,7 @@ def __init__( kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -220,7 +220,9 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index f55964bb..f3909dea 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -20,7 +20,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 47a0ad5d..34b466b8 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -29,7 +29,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -109,7 +109,9 @@ def construct_model(self): merged_output = concatenate([N_g, N_l]) model = Model(inputs=[self.x1, self.x2], outputs=merged_output) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 75ec200e..e4afe40c 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -475,7 +475,9 @@ def construct_model(self, n_features, n_objects): model = Model(inputs=input_layer, outputs=scores) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 8f6c20ec..2e4bcfa1 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -36,7 +36,7 @@ def __init__( kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=None, + metrics=(), batch_size=256, random_state=None, **kwargs, @@ -249,7 +249,9 @@ def create_input_lambda(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index b73e9b43..aec6b8ca 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -28,7 +28,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -99,7 +99,9 @@ def construct_model(self): output = self.output_node(merged_inputs) model = Model(inputs=[self.x1, self.x2], outputs=output) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index 5e8315d0..bfa00f0b 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 0da4e5f2..7a48817a 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -14,7 +14,7 @@ def __init__( n_hidden_set_layers=2, n_hidden_set_units=2, loss_function="categorical_hinge", - metrics=["categorical_accuracy"], + metrics=("categorical_accuracy",), n_hidden_joint_layers=32, n_hidden_joint_units=32, activation="selu", diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index 2705dada..d86cee84 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -33,7 +33,7 @@ def __init__( kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=["categorical_accuracy"], + metrics=("categorical_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -264,7 +264,9 @@ def get_score_object(i): model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index 7fc5bf0a..ea3e638c 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -19,7 +19,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 25d013b0..6f2da278 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -21,7 +21,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index 841e6bff..4c30728d 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -22,7 +22,7 @@ def __init__( optimizer=SGD, batch_size=256, loss_function=hinged_rank_loss, - metrics=[zero_one_rank_loss_for_scores_ties], + metrics=(zero_one_rank_loss_for_scores_ties,), random_state=None, **kwargs, ): diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index 02ee8bbb..4a10c647 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -24,7 +24,7 @@ def __init__( kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=None, + metrics=(), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index 74c136ab..33142616 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -34,7 +34,7 @@ def __init__( activation="selu", kernel_initializer="lecun_normal", optimizer=SGD, - metrics=[zero_one_rank_loss_for_scores_ties], + metrics=(zero_one_rank_loss_for_scores_ties,), batch_size=256, random_state=None, **kwargs, @@ -214,7 +214,9 @@ def construct_model(self): merged = concatenate(outputs) model = Model(inputs=self.input_layer, outputs=merged) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index dc98c4ac..381bd30e 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -21,7 +21,7 @@ def __init__( kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, From 393aaee9c69f0b7cb71f2cb95b829bc2e31e5a19 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 27 Jun 2020 18:10:16 +0200 Subject: [PATCH 4/6] Do not override regularizer default parameters In the spirit of 2291f86fb5cf439653ca0db5621bed68d4bf1427. --- csrank/choicefunction/cmpnet_choice.py | 2 +- csrank/choicefunction/fate_choice.py | 2 +- csrank/choicefunction/feta_choice.py | 2 +- csrank/choicefunction/ranknet_choice.py | 2 +- csrank/core/cmpnet_core.py | 2 +- csrank/core/fate_network.py | 2 +- csrank/core/feta_network.py | 2 +- csrank/core/ranknet_core.py | 2 +- csrank/discretechoice/cmpnet_discrete_choice.py | 2 +- csrank/discretechoice/fate_discrete_choice.py | 2 +- csrank/discretechoice/feta_discrete_choice.py | 2 +- csrank/discretechoice/ranknet_discrete_choice.py | 2 +- csrank/objectranking/cmp_net.py | 2 +- csrank/objectranking/fate_object_ranker.py | 2 +- csrank/objectranking/feta_object_ranker.py | 2 +- csrank/objectranking/list_net.py | 2 +- csrank/objectranking/rank_net.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index 887b21a8..a687102a 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -16,7 +16,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index ab6348f5..a0569b95 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -20,7 +20,7 @@ def __init__( loss_function=binary_crossentropy, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, metrics=(), diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index 2c06c93f..c34f42ff 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -31,7 +31,7 @@ def __init__( num_subsample=5, loss_function=binary_crossentropy, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index f3909dea..7731c9f3 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -16,7 +16,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 34b466b8..f54ce7fb 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -25,7 +25,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index e4afe40c..301b74b4 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -27,7 +27,7 @@ def __init__( n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, random_state=None, diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 2e4bcfa1..f63eade7 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -32,7 +32,7 @@ def __init__( num_subsample=5, loss_function=hinged_rank_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index aec6b8ca..d3819119 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -24,7 +24,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index bfa00f0b..64b6e32c 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -15,7 +15,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 7a48817a..e29ca3dd 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -19,7 +19,7 @@ def __init__( n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, random_state=None, diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index d86cee84..f70d473d 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -29,7 +29,7 @@ def __init__( num_subsample=5, loss_function="categorical_hinge", batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index ea3e638c..0df1a3e4 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -15,7 +15,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 6f2da278..dc4deb96 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -17,7 +17,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index 4c30728d..ca07f737 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -18,7 +18,7 @@ def __init__( n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, loss_function=hinged_rank_loss, diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index 4a10c647..e341ff16 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -20,7 +20,7 @@ def __init__( num_subsample=5, loss_function=hinged_rank_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index 33142616..896fd888 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -30,7 +30,7 @@ def __init__( n_units=8, loss_function=plackett_luce_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), activation="selu", kernel_initializer="lecun_normal", optimizer=SGD, diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index 381bd30e..fba9aee6 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -17,7 +17,7 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, From 1a08dcaec66db6b1d11475bd9fac17c3b99e79ae Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 27 Jun 2020 19:04:53 +0200 Subject: [PATCH 5/6] Initialize loss functions lazily Scikit-learn requires unmodified storage of estimator parameters. Instead, parameter validation and initialization should be deferred to fit, which we do here by moving it into the model creation. --- csrank/discretechoice/generalized_nested_logit.py | 7 ++++--- csrank/discretechoice/mixed_logit_model.py | 7 ++++--- csrank/discretechoice/multinomial_logit_model.py | 7 ++++--- csrank/discretechoice/nested_logit_model.py | 7 ++++--- csrank/discretechoice/paired_combinatorial_logit.py | 7 ++++--- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py index 3df68d86..083d9e12 100644 --- a/csrank/discretechoice/generalized_nested_logit.py +++ b/csrank/discretechoice/generalized_nested_logit.py @@ -92,7 +92,7 @@ def __init__( self.n_nests = n_nests self.alpha = alpha - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.random_state = random_state known_regularization_functions = {"l1", "l2"} @@ -257,6 +257,7 @@ def construct_model(self, X, Y): model : pymc3 Model :class:`pm.Model` """ self.random_state_ = check_random_state(self.random_state) + self.loss_function_ = likelihood_dict.get(self.loss_function, None) if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) @@ -280,7 +281,7 @@ def construct_model(self, X, Y): lambda_k = pm.Uniform("lambda_k", self.alpha, 1.0, shape=self.n_nests) self.p = self.get_probabilities(utility, lambda_k, alpha_ik) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -403,7 +404,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None diff --git a/csrank/discretechoice/mixed_logit_model.py b/csrank/discretechoice/mixed_logit_model.py index 21ce0b80..86efc9f0 100644 --- a/csrank/discretechoice/mixed_logit_model.py +++ b/csrank/discretechoice/mixed_logit_model.py @@ -72,7 +72,7 @@ def __init__(self, n_mixtures=4, loss_function="", regularization="l2", **kwargs [3] Daniel McFadden and Kenneth Train. „Mixed MNL models for discrete response“. In: Journal of applied Econometrics 15.5 (2000), pp. 447–470 """ self.logger = logging.getLogger(MixedLogitModel.__name__) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -155,6 +155,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -163,7 +164,7 @@ def construct_model(self, X, Y): utility = tt.dot(self.Xt, weights_dict["weights"]) self.p = tt.mean(ttu.softmax(utility, axis=1), axis=2) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -257,7 +258,7 @@ def set_tunable_parameters( Dictionary containing parameter values which are not tuned for the network """ if loss_function in likelihood_dict: - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.n_mixtures = n_mixtures self.regularization = regularization self.model = None diff --git a/csrank/discretechoice/multinomial_logit_model.py b/csrank/discretechoice/multinomial_logit_model.py index d6fe4f5f..fb52f095 100644 --- a/csrank/discretechoice/multinomial_logit_model.py +++ b/csrank/discretechoice/multinomial_logit_model.py @@ -64,7 +64,7 @@ def __init__(self, loss_function="", regularization="l2", **kwargs): [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986 """ self.logger = logging.getLogger(MultinomialLogitModel.__name__) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -151,6 +151,7 @@ def construct_model(self, X, Y): print_dictionary(self.model_configuration) ) ) + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -162,7 +163,7 @@ def construct_model(self, X, Y): self.p = ttu.softmax(utility, axis=1) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -257,7 +258,7 @@ def set_tunable_parameters(self, loss_function=None, regularization="l1", **poin raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py index 328a9ef8..326620dc 100644 --- a/csrank/discretechoice/nested_logit_model.py +++ b/csrank/discretechoice/nested_logit_model.py @@ -91,7 +91,7 @@ def __init__( self.n_nests = n_nests self.alpha = alpha self.random_state = random_state - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -313,6 +313,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) @@ -339,7 +340,7 @@ def construct_model(self, X, Y): self.p = self.get_probabilities(utility, lambda_k, utility_k) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -464,7 +465,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.cluster_model = None self.features_nests = None self.model = None diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py index 7b19ff90..0cc4d084 100644 --- a/csrank/discretechoice/paired_combinatorial_logit.py +++ b/csrank/discretechoice/paired_combinatorial_logit.py @@ -91,7 +91,7 @@ def __init__( self.logger = logging.getLogger(PairedCombinatorialLogit.__name__) self.alpha = alpha self.random_state = random_state - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -260,6 +260,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -269,7 +270,7 @@ def construct_model(self, X, Y): utility = tt.dot(self.Xt, weights_dict["weights"]) self.p = self.get_probabilities(utility, lambda_k) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -380,7 +381,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None From 217c838d8ab69a5d317dc78182ba0b35d33b32b3 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 27 Jun 2020 19:08:36 +0200 Subject: [PATCH 6/6] Rename use_zeroth_model Scikit-learn mandates that all estimator arguments should be stored in an instance attribute of the same name. --- csrank/choicefunction/feta_choice.py | 12 +++++------ csrank/core/feta_network.py | 20 +++++++++---------- csrank/discretechoice/feta_discrete_choice.py | 20 +++++++++---------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index c34f42ff..daf2ae3c 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -119,7 +119,7 @@ def _construct_layers(self, **kwargs): # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), *kwargs @@ -131,7 +131,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs): self.output_node = Dense( 1, activation="linear", kernel_regularizer=self.kernel_regularizer ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="linear", kernel_regularizer=self.kernel_regularizer ) @@ -169,7 +169,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -184,7 +184,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -214,7 +214,7 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores = add([scores, zeroth_order_scores]) scores = Activation("sigmoid")(scores) model = Model(inputs=self.input_layer, outputs=scores) diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index f63eade7..d9119528 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -54,7 +54,7 @@ def __init__( self.batch_size = batch_size self.hash_file = None self.optimizer = optimizer - self._use_zeroth_model = add_zeroth_order_model + self.add_zeroth_order_model = add_zeroth_order_model self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -80,7 +80,7 @@ def _construct_layers(self, **kwargs): # X = Input(shape=(None, n_features)) self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), **kwargs @@ -92,7 +92,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -105,14 +105,14 @@ def _construct_layers(self, **kwargs): self.output_node = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) @property def zero_order_model(self): - if self._zero_order_model is None and self._use_zeroth_model: + if self._zero_order_model is None and self.add_zeroth_order_model: self.logger.info("Creating zeroth model") inp = Input(shape=(self.n_object_features_fit_,)) @@ -153,7 +153,7 @@ def pairwise_model(self): def _predict_pair(self, a, b, only_pairwise=False, **kwargs): # TODO: Is this working correctly? pairwise = self.pairwise_model.predict([a, b], **kwargs) - if not only_pairwise and self._use_zeroth_model: + if not only_pairwise and self.add_zeroth_order_model: utility_a = self.zero_order_model.predict([a]) utility_b = self.zero_order_model.predict([b]) return pairwise + (utility_a, utility_b) @@ -173,7 +173,7 @@ def _predict_scores_using_pairs(self, X, **kwd): scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) del result del pairs - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) scores_zero = scores_zero.reshape(n_instances, n_objects) scores = scores + scores_zero @@ -199,7 +199,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -214,7 +214,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -244,7 +244,7 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores = add([scores, zeroth_order_scores]) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index f70d473d..3145ecf8 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -116,7 +116,7 @@ def _construct_layers(self, **kwargs): # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), *kwargs @@ -128,7 +128,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs): kernel_regularizer=self.kernel_regularizer, name="score", ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="linear", @@ -178,7 +178,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -193,7 +193,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -223,7 +223,7 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: def get_score_object(i): return Lambda(lambda x: x[:, i, None]) @@ -242,9 +242,9 @@ def get_score_object(i): scores.append(self.weighted_sum(concat_scores[i])) scores = concatenate(scores) - # if self._use_zeroth_model: + # if self.add_zeroth_order_model: # scores = add([scores, zeroth_order_scores]) - # if self._use_zeroth_model: + # if self.add_zeroth_order_model: # def expand_dims(): # return Lambda(lambda x: x[..., None]) # @@ -259,7 +259,7 @@ def get_score_object(i): # kernel_regularizer=self.kernel_regularizer, use_bias=False) # scores = weighted_sum(concat_scores) # scores = squeeze_dims()(scores) - if not self._use_zeroth_model: + if not self.add_zeroth_order_model: scores = Activation("sigmoid")(scores) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") @@ -301,7 +301,7 @@ def _predict_scores_using_pairs(self, X, **kwd): scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) del result del pairs - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) scores_zero = scores_zero.reshape(n_instances, n_objects) model = self._create_weighted_model(n_objects)