From 5bbf63c9db249eb71e287fee6b3aa69e7c96dd5a Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 2 May 2020 17:19:05 +0200 Subject: [PATCH 01/16] Determine data dimensionality lazily in FETALinear Currently we require the data dimensionality to be passed into the FETALinear constructor. It is then checked that it matches the dimensionality of the data in `fit`. This is somewhat redundant and not compatible with the scikit-learn estimator API. We were already creating the model in the `fit` function. It was also created when updating the hyperparameters, but that is redundant. In the `fit` function we can simply derive the dimensionality from the data. --- csrank/choicefunction/fetalinear_choice.py | 8 ------- csrank/core/feta_linear.py | 23 +++++++++---------- .../fetalinear_discrete_choice.py | 8 ------- .../objectranking/fetalinear_object_ranker.py | 8 ------- 4 files changed, 11 insertions(+), 36 deletions(-) diff --git a/csrank/choicefunction/fetalinear_choice.py b/csrank/choicefunction/fetalinear_choice.py index abba6253..5dcc5cd5 100644 --- a/csrank/choicefunction/fetalinear_choice.py +++ b/csrank/choicefunction/fetalinear_choice.py @@ -10,8 +10,6 @@ class FETALinearChoiceFunction(FETALinearCore, ChoiceFunctions): def __init__( self, - n_object_features, - n_objects, loss_function=binary_crossentropy, learning_rate=5e-3, batch_size=256, @@ -40,10 +38,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -56,8 +50,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, learning_rate=learning_rate, batch_size=batch_size, loss_function=loss_function, diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py index 40dd11a1..e16aba03 100644 --- a/csrank/core/feta_linear.py +++ b/csrank/core/feta_linear.py @@ -15,8 +15,6 @@ class FETALinearCore(Learner): def __init__( self, - n_object_features, - n_objects, learning_rate=1e-3, batch_size=256, loss_function=binary_crossentropy, @@ -28,9 +26,7 @@ def __init__( self.learning_rate = learning_rate self.batch_size = batch_size self.random_state = random_state - self.n_object_features = n_object_features self.loss_function = loss_function - self.n_objects = n_objects self.epochs_drop = epochs_drop self.drop = drop self.current_lr = None @@ -43,20 +39,24 @@ def __init__( self.W_last = None def _construct_model_(self, n_objects): - self.X = tf.placeholder("float32", [None, n_objects, self.n_object_features]) + self.X = tf.placeholder( + "float32", [None, n_objects, self.n_object_features_fit_] + ) self.Y = tf.placeholder("float32", [None, n_objects]) - std = 1 / np.sqrt(self.n_object_features) + std = 1 / np.sqrt(self.n_object_features_fit_) self.b1 = tf.Variable( self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 ) self.W1 = tf.Variable( self.random_state_.normal( - loc=0, scale=std, size=2 * self.n_object_features + loc=0, scale=std, size=2 * self.n_object_features_fit_ ), dtype=tf.float32, ) self.W2 = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=self.n_object_features), + self.random_state_.normal( + loc=0, scale=std, size=self.n_object_features_fit_ + ), dtype=tf.float32, ) self.b2 = tf.Variable( @@ -101,9 +101,8 @@ def fit( ): self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer - n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features - self._construct_model_(n_objects) + n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._construct_model_(self.n_objects_fit_) init = tf.global_variables_initializer() with tf.Session() as tf_session: @@ -148,7 +147,7 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): def _predict_scores_fixed(self, X, **kwargs): n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features + assert n_features == self.n_object_features_fit_ outputs = [list() for _ in range(n_objects)] for i, j in combinations(range(n_objects), 2): x1 = X[:, i] diff --git a/csrank/discretechoice/fetalinear_discrete_choice.py b/csrank/discretechoice/fetalinear_discrete_choice.py index c09c1504..9215a7d6 100644 --- a/csrank/discretechoice/fetalinear_discrete_choice.py +++ b/csrank/discretechoice/fetalinear_discrete_choice.py @@ -9,8 +9,6 @@ class FETALinearDiscreteChoiceFunction(FETALinearCore, DiscreteObjectChooser): def __init__( self, - n_object_features, - n_objects, loss_function=categorical_hinge, learning_rate=5e-3, batch_size=256, @@ -39,10 +37,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -55,8 +49,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, learning_rate=learning_rate, batch_size=batch_size, loss_function=loss_function, diff --git a/csrank/objectranking/fetalinear_object_ranker.py b/csrank/objectranking/fetalinear_object_ranker.py index 75216616..17641279 100644 --- a/csrank/objectranking/fetalinear_object_ranker.py +++ b/csrank/objectranking/fetalinear_object_ranker.py @@ -8,8 +8,6 @@ class FETALinearObjectRanker(FETALinearCore, ObjectRanker): def __init__( self, - n_object_features, - n_objects, loss_function=hinged_rank_loss, learning_rate=5e-3, batch_size=256, @@ -38,10 +36,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -54,8 +48,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, learning_rate=learning_rate, batch_size=batch_size, loss_function=loss_function, From afc46e8ef2757b2d8a0b42e91b453eb0a2c26833 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 15:31:21 +0200 Subject: [PATCH 02/16] Determine data dimensionality lazily in FATELinear --- csrank/choicefunction/fatelinear_choice.py | 8 ------- csrank/core/fate_linear.py | 23 +++++++++---------- .../fatelinear_discrete_choice.py | 8 ------- .../objectranking/fatelinear_object_ranker.py | 8 ------- 4 files changed, 11 insertions(+), 36 deletions(-) diff --git a/csrank/choicefunction/fatelinear_choice.py b/csrank/choicefunction/fatelinear_choice.py index bce47b35..31191f9f 100644 --- a/csrank/choicefunction/fatelinear_choice.py +++ b/csrank/choicefunction/fatelinear_choice.py @@ -10,8 +10,6 @@ class FATELinearChoiceFunction(FATELinearCore, ChoiceFunctions): def __init__( self, - n_object_features, - n_objects, n_hidden_set_units=2, loss_function=binary_crossentropy, learning_rate=1e-3, @@ -41,10 +39,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -57,8 +51,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, n_hidden_set_units=n_hidden_set_units, learning_rate=learning_rate, batch_size=batch_size, diff --git a/csrank/core/fate_linear.py b/csrank/core/fate_linear.py index 30de1dd7..c645d284 100644 --- a/csrank/core/fate_linear.py +++ b/csrank/core/fate_linear.py @@ -14,8 +14,6 @@ class FATELinearCore(Learner): def __init__( self, - n_object_features, - n_objects, n_hidden_set_units=32, learning_rate=1e-3, batch_size=256, @@ -29,9 +27,7 @@ def __init__( self.learning_rate = learning_rate self.batch_size = batch_size self.random_state = random_state - self.n_object_features = n_object_features self.loss_function = loss_function - self.n_objects = n_objects self.epochs_drop = epochs_drop self.drop = drop self.current_lr = None @@ -42,16 +38,20 @@ def __init__( self.optimizer = None def _construct_model_(self, n_objects): - self.X = tf.placeholder("float32", [None, n_objects, self.n_object_features]) + self.X = tf.placeholder( + "float32", [None, n_objects, self.n_object_features_fit_] + ) self.Y = tf.placeholder("float32", [None, n_objects]) - std = 1 / np.sqrt(self.n_object_features) + std = 1 / np.sqrt(self.n_object_features_fit_) self.b1 = tf.Variable( self.random_state_.normal(loc=0, scale=std, size=self.n_hidden_set_units), dtype=tf.float32, ) self.W1 = tf.Variable( self.random_state_.normal( - loc=0, scale=std, size=(self.n_object_features, self.n_hidden_set_units) + loc=0, + scale=std, + size=(self.n_object_features_fit_, self.n_hidden_set_units), ), dtype=tf.float32, ) @@ -59,7 +59,7 @@ def _construct_model_(self, n_objects): self.random_state_.normal( loc=0, scale=std, - size=(self.n_object_features + self.n_hidden_set_units), + size=(self.n_object_features_fit_ + self.n_hidden_set_units), ), dtype=tf.float32, ) @@ -95,9 +95,8 @@ def fit( ): self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer - n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features - self._construct_model_(n_objects) + n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._construct_model_(self.n_objects_fit_) init = tf.global_variables_initializer() with tf.Session() as tf_session: @@ -141,7 +140,7 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): def _predict_scores_fixed(self, X, **kwargs): n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features + assert n_features == self.n_object_features_fit_ rep = np.mean(np.dot(X, self.weight1), axis=1) + self.bias1 rep = np.tile(rep[:, np.newaxis, :], (1, n_objects, 1)) X_n = np.concatenate((X, rep), axis=2) diff --git a/csrank/discretechoice/fatelinear_discrete_choice.py b/csrank/discretechoice/fatelinear_discrete_choice.py index 015f6345..9e874af0 100644 --- a/csrank/discretechoice/fatelinear_discrete_choice.py +++ b/csrank/discretechoice/fatelinear_discrete_choice.py @@ -9,8 +9,6 @@ class FATELinearDiscreteChoiceFunction(FATELinearCore, DiscreteObjectChooser): def __init__( self, - n_object_features, - n_objects, n_hidden_set_units=2, loss_function=categorical_hinge, learning_rate=1e-3, @@ -40,10 +38,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -56,8 +50,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, n_hidden_set_units=n_hidden_set_units, learning_rate=learning_rate, batch_size=batch_size, diff --git a/csrank/objectranking/fatelinear_object_ranker.py b/csrank/objectranking/fatelinear_object_ranker.py index 092d9d11..dac946fb 100644 --- a/csrank/objectranking/fatelinear_object_ranker.py +++ b/csrank/objectranking/fatelinear_object_ranker.py @@ -8,8 +8,6 @@ class FATELinearObjectRanker(FATELinearCore, ObjectRanker): def __init__( self, - n_object_features, - n_objects, n_hidden_set_units=2, loss_function=hinged_rank_loss, learning_rate=1e-3, @@ -39,10 +37,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object - n_objects : int - Number of objects in each choice set n_hidden_set_units : int Number of hidden set units. batch_size : int @@ -55,8 +49,6 @@ def __init__( Keyword arguments for the @FATENetwork """ super().__init__( - n_object_features=n_object_features, - n_objects=n_objects, n_hidden_set_units=n_hidden_set_units, learning_rate=learning_rate, batch_size=batch_size, From d3d0386a098673d6182c0f9aafbf593db13d1a39 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 15:49:57 +0200 Subject: [PATCH 03/16] Determine data dimensionality lazily in FATE --- README.rst | 2 +- csrank/choicefunction/fate_choice.py | 4 ---- csrank/core/fate_network.py | 16 ++++++---------- csrank/objectranking/fate_object_ranker.py | 4 ---- csrank/objectranking/feta_object_ranker.py | 8 -------- csrank/tests/test_fate.py | 3 +-- docs/intro.rst | 2 +- docs/notebooks/FATE-Net-DC.ipynb | 1 - docs/notebooks/FATE-Net-Ranking.ipynb | 1 - 9 files changed, 9 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index 058f52d4..a4909442 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ method: .. code-block:: python - fate = cs.FATEChoiceFunction(n_object_features=2) + fate = cs.FATEChoiceFunction() fate.fit(X_train, Y_train) Predictions can then be obtained using: diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index a4d3c194..3e9ad830 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -13,7 +13,6 @@ class FATEChoiceFunction(FATENetwork, ChoiceFunctions): def __init__( self, - n_object_features, n_hidden_set_layers=2, n_hidden_set_units=2, n_hidden_joint_layers=32, @@ -50,8 +49,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object n_hidden_set_layers : int Number of set layers. n_hidden_set_units : int @@ -82,7 +79,6 @@ def __init__( self.loss_function = loss_function self.metrics = metrics super().__init__( - n_object_features=n_object_features, n_hidden_set_layers=n_hidden_set_layers, n_hidden_set_units=n_hidden_set_units, n_hidden_joint_layers=n_hidden_joint_layers, diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 518a7a4e..58a18d8a 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -186,17 +186,13 @@ def set_tunable_parameters( class FATENetwork(FATENetworkCore): - def __init__( - self, n_object_features, n_hidden_set_layers=1, n_hidden_set_units=1, **kwargs - ): + def __init__(self, n_hidden_set_layers=1, n_hidden_set_units=1, **kwargs): """ Create a FATE-network architecture. Training and prediction complexity is linear in the number of objects. Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object n_hidden_set_layers : int Number of hidden set layers. n_hidden_set_units : int @@ -209,7 +205,6 @@ def __init__( self.n_hidden_set_layers = n_hidden_set_layers self.n_hidden_set_units = n_hidden_set_units - self.n_object_features = n_object_features self.model = None self.set_layer = None self._create_set_layers( @@ -271,7 +266,7 @@ def _bucket_frequencies(X, min_bucket_size=32): def _construct_models(self, buckets): models = dict() - n_features = self.n_object_features + n_features = self.n_object_features_fit_ for n_objects in buckets.keys(): model = self.construct_model(n_features, n_objects) @@ -501,6 +496,7 @@ def fit( Keyword arguments for the fit function """ self.random_state_ = check_random_state(self.random_state) + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape self._fit( X=X, Y=Y, @@ -598,14 +594,14 @@ def _get_context_representation(self, X, kwargs): "Test Set instances {} objects {} features {}".format(*X.shape) ) input_layer_scorer = Input( - shape=(n_objects, self.n_object_features), name="input_node" + shape=(n_objects, self.n_object_features_fit_), name="input_node" ) if self.n_hidden_set_layers >= 1: self.set_layer(input_layer_scorer) fr = self.set_layer.cached_models[n_objects].predict(X, **kwargs) del self.set_layer.cached_models[n_objects] X_n = np.empty( - (fr.shape[0], n_objects, fr.shape[1] + self.n_object_features), + (fr.shape[0], n_objects, fr.shape[1] + self.n_object_features_fit_), dtype="float", ) for i in range(n_objects): @@ -681,7 +677,7 @@ def clear_memory(self, n_objects=5, **kwargs): kernel_regularizer=self.kernel_regularizer, **self.kwargs ) - self.model = self.construct_model(self.n_object_features, n_objects) + self.model = self.construct_model(self.n_object_features_fit_, n_objects) self.model.load_weights(self.hash_file) else: self.logger.info("Cannot clear the memory") diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index 3d564c22..8e0f5dda 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -12,7 +12,6 @@ class FATEObjectRanker(FATENetwork, ObjectRanker): def __init__( self, - n_object_features, n_hidden_set_layers=2, n_hidden_set_units=2, n_hidden_joint_layers=32, @@ -48,8 +47,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object n_hidden_set_layers : int Number of set layers. n_hidden_set_units : int @@ -80,7 +77,6 @@ def __init__( self.loss_function = loss_function self.metrics = metrics super().__init__( - n_object_features=n_object_features, n_hidden_set_layers=n_hidden_set_layers, n_hidden_set_units=n_hidden_set_units, n_hidden_joint_layers=n_hidden_joint_layers, diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index db81aa66..b6e38b00 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -13,8 +13,6 @@ class FETAObjectRanker(FETANetwork, ObjectRanker): def __init__( self, - n_objects, - n_object_features, n_hidden=2, n_units=8, add_zeroth_order_model=False, @@ -49,10 +47,6 @@ def __init__( Parameters ---------- - n_objects : int - Number of objects to be ranked - n_object_features : int - Dimensionality of the feature space of each object n_hidden : int Number of hidden layers n_units : int @@ -85,8 +79,6 @@ def __init__( Keyword arguments for the hidden units """ super().__init__( - n_objects=n_objects, - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, add_zeroth_order_model=add_zeroth_order_model, diff --git a/csrank/tests/test_fate.py b/csrank/tests/test_fate.py index 453403e9..9122ad7e 100644 --- a/csrank/tests/test_fate.py +++ b/csrank/tests/test_fate.py @@ -32,7 +32,7 @@ def predict(self, *args, **kwargs): def fit(self, *args, **kwargs): pass - grc = MockClass(n_objects=n_objects, n_features=n_features) + grc = MockClass() grc._construct_layers( activation=grc.activation, kernel_initializer=grc.kernel_initializer, @@ -87,7 +87,6 @@ def trivial_ranking_problem_generator(): yield x, y_true fate = FATEObjectRanker( - n_object_features=1, n_hidden_joint_layers=1, n_hidden_set_layers=1, n_hidden_joint_units=5, diff --git a/docs/intro.rst b/docs/intro.rst index da7b09b2..b270a0ba 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -52,7 +52,7 @@ method: .. code-block:: python - fate = cs.FATEChoiceFunction(n_object_features=2) + fate = cs.FATEChoiceFunction() fate.fit(X_train, Y_train) Predictions can then be obtained using: diff --git a/docs/notebooks/FATE-Net-DC.ipynb b/docs/notebooks/FATE-Net-DC.ipynb index 11234602..5997dfaa 100644 --- a/docs/notebooks/FATE-Net-DC.ipynb +++ b/docs/notebooks/FATE-Net-DC.ipynb @@ -139,7 +139,6 @@ "from csrank import FATEObjectRanker\n", "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", - " n_object_features=n_features,\n", " loss_function=smooth_rank_loss,\n", " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" ] diff --git a/docs/notebooks/FATE-Net-Ranking.ipynb b/docs/notebooks/FATE-Net-Ranking.ipynb index eacef606..22a12b2b 100644 --- a/docs/notebooks/FATE-Net-Ranking.ipynb +++ b/docs/notebooks/FATE-Net-Ranking.ipynb @@ -131,7 +131,6 @@ "from csrank import FATEObjectRanker\n", "from csrank.losses import smooth_rank_loss\n", "fate = FATEObjectRanker(\n", - " n_object_features=n_features,\n", " loss_function=smooth_rank_loss,\n", " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" ] From 77ede4606e53b31b2856dc91ad40ead71bdbcd32 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 16:33:13 +0200 Subject: [PATCH 04/16] Determine data dimensionality lazily in FETA --- csrank/choicefunction/feta_choice.py | 20 +++---- csrank/core/feta_network.py | 54 ++++++++++--------- csrank/discretechoice/fate_discrete_choice.py | 4 -- csrank/discretechoice/feta_discrete_choice.py | 26 ++++----- 4 files changed, 45 insertions(+), 59 deletions(-) diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index eb171b50..4389bb74 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -24,8 +24,6 @@ class FETAChoiceFunction(FETANetwork, ChoiceFunctions): def __init__( self, - n_objects, - n_object_features, n_hidden=2, n_units=8, add_zeroth_order_model=False, @@ -61,10 +59,6 @@ def __init__( Parameters ---------- - n_objects : int - Number of objects in each query set - n_object_features : int - Dimensionality of the feature space of each object n_hidden : int Number of hidden layers n_units : int @@ -97,8 +91,6 @@ def __init__( Keyword arguments for the hidden units """ super().__init__( - n_objects=n_objects, - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, add_zeroth_order_model=add_zeroth_order_model, @@ -119,7 +111,9 @@ def __init__( self.logger = logging.getLogger(FETAChoiceFunction.__name__) def _construct_layers(self, **kwargs): - self.input_layer = Input(shape=(self.n_objects, self.n_object_features)) + self.input_layer = Input( + shape=(self.n_objects_fit_, self.n_object_features_fit_) + ) # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: @@ -177,7 +171,7 @@ def create_input_lambda(i): self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] - for i in range(self.n_objects): + for i in range(self.n_objects_fit_): x = create_input_lambda(i)(self.input_layer) inputs.append(x) for hidden in self.hidden_layers_zeroth: @@ -186,8 +180,8 @@ def create_input_lambda(i): zeroth_order_scores = concatenate(zeroth_order_outputs) self.logger.debug("0th order model finished") self.logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects)] - for i, j in combinations(range(self.n_objects), 2): + outputs = [list() for _ in range(self.n_objects_fit_)] + for i, j in combinations(range(self.n_objects_fit_), 2): if self._use_zeroth_model: x1 = inputs[i] x2 = inputs[j] @@ -296,7 +290,7 @@ def fit( self.threshold = 0.5 def sub_sampling(self, X, Y): - if self._n_objects <= self.max_number_of_objects: + if self.n_objects_fit_ <= self.max_number_of_objects: return X, Y n_objects = self.max_number_of_objects bucket_size = int(X.shape[1] / n_objects) diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 6088acac..ae7deb64 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -26,8 +26,6 @@ class FETANetwork(Learner): def __init__( self, - n_objects, - n_object_features, n_hidden=2, n_units=8, add_zeroth_order_model=False, @@ -52,10 +50,8 @@ def __init__( self.activation = activation self.loss_function = loss_function self.metrics = metrics - self._n_objects = n_objects self.max_number_of_objects = max_number_of_objects self.num_subsample = num_subsample - self.n_object_features = n_object_features self.batch_size = batch_size self.hash_file = None self.optimizer = optimizers.get(optimizer) @@ -68,24 +64,20 @@ def __init__( if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs - self._construct_layers( - kernel_regularizer=self.kernel_regularizer, - kernel_initializer=self.kernel_initializer, - activation=self.activation, - **self.kwargs - ) self._pairwise_model = None self.model = None self._zero_order_model = None @property def n_objects(self): - if self._n_objects > self.max_number_of_objects: + if self.n_objects_fit_ > self.max_number_of_objects: return self.max_number_of_objects - return self._n_objects + return self.n_objects_fit_ def _construct_layers(self, **kwargs): - self.input_layer = Input(shape=(self.n_objects, self.n_object_features)) + self.input_layer = Input( + shape=(self.n_objects_fit_, self.n_object_features_fit_) + ) # Todo: Variable sized input # X = Input(shape=(None, n_features)) self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) @@ -124,7 +116,7 @@ def _construct_layers(self, **kwargs): def zero_order_model(self): if self._zero_order_model is None and self._use_zeroth_model: self.logger.info("Creating zeroth model") - inp = Input(shape=(self.n_object_features,)) + inp = Input(shape=(self.n_object_features_fit_,)) x = inp for hidden in self.hidden_layers_zeroth: @@ -139,8 +131,8 @@ def zero_order_model(self): def pairwise_model(self): if self._pairwise_model is None: self.logger.info("Creating pairwise model") - x1 = Input(shape=(self.n_object_features,)) - x2 = Input(shape=(self.n_object_features,)) + x1 = Input(shape=(self.n_object_features_fit_,)) + x2 = Input(shape=(self.n_object_features_fit_,)) x1x2 = concatenate([x1, x2]) x2x1 = concatenate([x2, x1]) @@ -213,7 +205,7 @@ def create_input_lambda(i): self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] - for i in range(self.n_objects): + for i in range(self.n_objects_fit_): x = create_input_lambda(i)(self.input_layer) inputs.append(x) for hidden in self.hidden_layers_zeroth: @@ -222,8 +214,8 @@ def create_input_lambda(i): zeroth_order_scores = concatenate(zeroth_order_outputs) self.logger.debug("0th order model finished") self.logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects)] - for i, j in combinations(range(self.n_objects), 2): + outputs = [list() for _ in range(self.n_objects_fit_)] + for i, j in combinations(range(self.n_objects_fit_), 2): if self._use_zeroth_model: x1 = inputs[i] x2 = inputs[j] @@ -289,6 +281,14 @@ def fit( **kwd : Keyword arguments for the fit function """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self._construct_layers( + kernel_regularizer=self.kernel_regularizer, + kernel_initializer=self.kernel_initializer, + activation=self.activation, + **self.kwargs + ) + self.logger.debug("Enter fit function...") self.random_state_ = check_random_state(self.random_state) @@ -310,18 +310,20 @@ def fit( self.model.save_weights(self.hash_file) def sub_sampling(self, X, Y): - if self._n_objects > self.max_number_of_objects: - bucket_size = int(self._n_objects / self.max_number_of_objects) - idx = self.random_state_.randint(bucket_size, size=(len(X), self.n_objects)) + if self.n_objects_fit_ > self.max_number_of_objects: + bucket_size = int(self.n_objects_fit_ / self.max_number_of_objects) + idx = self.random_state_.randint( + bucket_size, size=(len(X), self.n_objects_fit_) + ) # TODO: subsampling multiple rankings - idx += np.arange(start=0, stop=self._n_objects, step=bucket_size)[ - : self.n_objects + idx += np.arange(start=0, stop=self.n_objects_fit_, step=bucket_size)[ + : self.n_objects_fit_ ] X = X[np.arange(X.shape[0])[:, None], idx] Y = Y[np.arange(X.shape[0])[:, None], idx] tmp_sort = Y.argsort(axis=-1) Y = np.empty_like(Y) - Y[np.arange(len(X))[:, None], tmp_sort] = np.arange(self.n_objects) + Y[np.arange(len(X))[:, None], tmp_sort] = np.arange(self.n_objects_fit_) return X, Y def _predict_scores_fixed(self, X, **kwargs): @@ -329,7 +331,7 @@ def _predict_scores_fixed(self, X, **kwargs): self.logger.info( "For Test instances {} objects {} features {}".format(*X.shape) ) - if self.n_objects != n_objects: + if self.n_objects_fit_ != n_objects: scores = self._predict_scores_using_pairs(X, **kwargs) else: scores = self.model.predict(X, **kwargs) diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 3d3c5aec..4842c0f3 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -11,7 +11,6 @@ class FATEDiscreteChoiceFunction(FATENetwork, DiscreteObjectChooser): def __init__( self, - n_object_features, n_hidden_set_layers=2, n_hidden_set_units=2, loss_function="categorical_hinge", @@ -48,8 +47,6 @@ def __init__( Parameters ---------- - n_object_features : int - Dimensionality of the feature space of each object n_hidden_set_layers : int Number of set layers. n_hidden_set_units : int @@ -80,7 +77,6 @@ def __init__( self.loss_function = loss_function self.metrics = metrics super().__init__( - n_object_features=n_object_features, n_hidden_set_layers=n_hidden_set_layers, n_hidden_set_units=n_hidden_set_units, n_hidden_joint_layers=n_hidden_joint_layers, diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index c40710eb..afbff700 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -22,8 +22,6 @@ class FETADiscreteChoiceFunction(FETANetwork, DiscreteObjectChooser): def __init__( self, - n_objects, - n_object_features, n_hidden=2, n_units=8, add_zeroth_order_model=False, @@ -59,10 +57,6 @@ def __init__( Parameters ---------- - n_objects : int - Number of objects in each query set - n_object_features : int - Dimensionality of the feature space of each object n_hidden : int Number of hidden layers n_units : int @@ -95,8 +89,6 @@ def __init__( Keyword arguments for the hidden units """ super().__init__( - n_objects=n_objects, - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, add_zeroth_order_model=add_zeroth_order_model, @@ -116,7 +108,9 @@ def __init__( self.logger = logging.getLogger(FETADiscreteChoiceFunction.__name__) def _construct_layers(self, **kwargs): - self.input_layer = Input(shape=(self.n_objects, self.n_object_features)) + self.input_layer = Input( + shape=(self.n_objects_fit_, self.n_object_features_fit_) + ) # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: @@ -186,7 +180,7 @@ def create_input_lambda(i): self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] - for i in range(self.n_objects): + for i in range(self.n_objects_fit_): x = create_input_lambda(i)(self.input_layer) inputs.append(x) for hidden in self.hidden_layers_zeroth: @@ -195,8 +189,8 @@ def create_input_lambda(i): zeroth_order_scores = concatenate(zeroth_order_outputs) self.logger.debug("0th order model finished") self.logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects)] - for i, j in combinations(range(self.n_objects), 2): + outputs = [list() for _ in range(self.n_objects_fit_)] + for i, j in combinations(range(self.n_objects_fit_), 2): if self._use_zeroth_model: x1 = inputs[i] x2 = inputs[j] @@ -239,10 +233,10 @@ def get_score_object(i): get_score_object(i)(zeroth_order_scores), ] ) - for i in range(self.n_objects) + for i in range(self.n_objects_fit_) ] scores = [] - for i in range(self.n_objects): + for i in range(self.n_objects_fit_): scores.append(self.weighted_sum(concat_scores[i])) scores = concatenate(scores) @@ -259,7 +253,7 @@ def get_score_object(i): # zeroth_order_scores = expand_dims()(zeroth_order_scores) # concat_scores = concatenate([scores, zeroth_order_scores], axis=-1) # weighted_sum = Conv1D(name='weighted_sum', filters=1, kernel_size=(1), strides=1, activation='linear', - # kernel_initializer=self.kernel_initializer, input_shape=(self.n_objects, 2), + # kernel_initializer=self.kernel_initializer, input_shape=(self.n_objects_fit_, 2), # kernel_regularizer=self.kernel_regularizer, use_bias=False) # scores = weighted_sum(concat_scores) # scores = squeeze_dims()(scores) @@ -313,7 +307,7 @@ def _predict_scores_using_pairs(self, X, **kwd): return scores def _create_zeroth_order_model(self): - inp = Input(shape=(self.n_object_features,)) + inp = Input(shape=(self.n_object_features_fit_,)) x = inp for hidden in self.hidden_layers_zeroth: From cad28c69de7e1a323a843c6de568f32039a05950 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 16:45:42 +0200 Subject: [PATCH 05/16] Determine data dimensionality lazily in ListNet --- csrank/objectranking/list_net.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index 076d0c07..0d53b167 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -26,8 +26,7 @@ class ListNet(Learner, ObjectRanker): def __init__( self, - n_object_features, - n_top, + n_top=1, n_hidden=2, n_units=8, loss_function=plackett_luce_loss, @@ -53,8 +52,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_top : int Size of the top-k-subrankings to consider for training hash_file: str @@ -90,8 +87,6 @@ def __init__( [1] Z. Cao, T. Qin, T. Liu, M. Tsai and H. Li. "Learning to Rank: From Pairwise Approach to Listwise Approach." ICML, 2007. """ self.logger = logging.getLogger(ListNet.__name__) - self.n_object_features = n_object_features - self.n_objects = n_top self.n_top = n_top self.batch_normalization = batch_normalization self.activation = activation @@ -108,12 +103,6 @@ def __init__( if key not in allowed_dense_kwargs: del kwargs[key] self.kwargs = kwargs - self._construct_layers( - kernel_regularizer=self.kernel_regularizer, - kernel_initializer=self.kernel_initializer, - activation=self.activation, - **self.kwargs - ) self.threshold_instances = int(1e10) self.batch_size = batch_size @@ -123,7 +112,7 @@ def __init__( self._scoring_model = None def _construct_layers(self, **kwargs): - self.input_layer = Input(shape=(self.n_top, self.n_object_features)) + self.input_layer = Input(shape=(self.n_top, self.n_object_features_fit_)) self.output_node = Dense( 1, activation="linear", kernel_regularizer=self.kernel_regularizer ) @@ -180,7 +169,13 @@ def fit( Keyword arguments for the fit function """ self.random_state_ = check_random_state(self.random_state) - self.n_objects = X.shape[1] + _n_instances, _n_objects, self.n_object_features_fit_ = X.shape + self._construct_layers( + kernel_regularizer=self.kernel_regularizer, + kernel_initializer=self.kernel_initializer, + activation=self.activation, + **self.kwargs + ) self.logger.debug("Creating top-k dataset") X, Y = self._create_topk(X, Y) self.logger.debug("Finished creating the dataset") @@ -236,7 +231,7 @@ def scoring_model(self): """ if self._scoring_model is None: self.logger.info("Creating scoring model") - inp = Input(shape=(self.n_object_features,)) + inp = Input(shape=(self.n_object_features_fit_,)) x = inp for hidden_layer in self.hidden_layers: x = hidden_layer(x) From a47247b4c19407a873fef51c96c492e17c422809 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 16:55:44 +0200 Subject: [PATCH 06/16] Determine data dimensionality lazily in NestedLogitModel --- csrank/discretechoice/nested_logit_model.py | 33 ++++++++++----------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py index e286a67a..a94ab78b 100644 --- a/csrank/discretechoice/nested_logit_model.py +++ b/csrank/discretechoice/nested_logit_model.py @@ -34,8 +34,6 @@ class NestedLogitModel(DiscreteObjectChooser, Learner): def __init__( self, - n_object_features, - n_objects, n_nests=None, loss_function="", regularization="l1", @@ -66,12 +64,10 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space - n_objects: int - Number of objects in each query set n_nests : int range : [2,n_objects/2] - The number of nests/subsets in which the objects are divided + The number of nests/subsets in which the objects are divided. + This may not surpass half the amount of objects this model will + be trained on. loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’} Loss function to be used for the discrete choice decision from the query set regularization : string, {‘l1’, ‘l2’}, string @@ -92,12 +88,7 @@ def __init__( [3] Kenneth Train and Daniel McFadden. „The goods/leisure tradeoff and disaggregate work trip mode choice models“. In: Transportation research 12.5 (1978), pp. 349–353 """ self.logger = logging.getLogger(NestedLogitModel.__name__) - self.n_object_features = n_object_features - self.n_objects = n_objects - if n_nests is None: - self.n_nests = int(n_objects / 2) - else: - self.n_nests = n_nests + self.n_nests = n_nests self.alpha = alpha self.random_state = random_state self.loss_function = likelihood_dict.get(loss_function, None) @@ -334,8 +325,8 @@ def construct_model(self, X, Y): self.Yt = theano.shared(Y) self.y_nests = theano.shared(y_nests) shapes = { - "weights": self.n_object_features, - "weights_k": self.n_object_features, + "weights": self.n_object_features_fit_, + "weights_k": self.n_object_features_fit_, } weights_dict = create_weight_dictionary(self.model_configuration, shapes) @@ -400,6 +391,9 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + if self.n_nests is None: + self.n_nests = int(self.n_objects_fit_ / 2) self.random_state_ = check_random_state(self.random_state) self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) @@ -408,12 +402,15 @@ def _predict_scores_fixed(self, X, **kwargs): y_nests = self.create_nests(X) mean_trace = dict(pm.summary(self.trace)["mean"]) weights = np.array( - [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)] + [ + mean_trace["weights[{}]".format(i)] + for i in range(self.n_object_features_fit_) + ] ) weights_k = np.array( [ mean_trace["weights_k[{}]".format(i)] - for i in range(self.n_object_features) + for i in range(self.n_object_features_fit_) ] ) lambda_k = np.array( @@ -456,7 +453,7 @@ def set_tunable_parameters( if alpha is not None: self.alpha = alpha if n_nests is None: - self.n_nests = int(self.n_objects / 2) + self.n_nests = int(self.n_objects_fit_ / 2) else: self.n_nests = n_nests self.regularization = regularization From 249378902cf2c0ac94eb40404f0a750fea639a5d Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 17:15:02 +0200 Subject: [PATCH 07/16] Determine data dimensionality lazily in GeneralizedNestedLogitModel --- .../generalized_nested_logit.py | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py index d826b2f7..20ae573c 100644 --- a/csrank/discretechoice/generalized_nested_logit.py +++ b/csrank/discretechoice/generalized_nested_logit.py @@ -34,8 +34,6 @@ class GeneralizedNestedLogitModel(DiscreteObjectChooser, Learner): def __init__( self, - n_object_features, - n_objects, n_nests=None, loss_function="None", regularization="l2", @@ -64,12 +62,12 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_objects: int Number of objects in each query set n_nests : int range : [2,n_objects/2] - The number of nests/subsets in which the objects are divided + The number of nests/subsets in which the objects are divided. + This may not surpass half the amount of objects this model will + be trained on. loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’} Loss function to be used for the discrete choice decision from the query set regularization : string, {‘l1’, ‘l2’}, string @@ -92,12 +90,7 @@ def __init__( """ self.logger = logging.getLogger(GeneralizedNestedLogitModel.__name__) - self.n_object_features = n_object_features - self.n_objects = n_objects - if n_nests is None: - self.n_nests = n_objects + int(n_objects / 2) - else: - self.n_nests = n_nests + self.n_nests = n_nests self.alpha = alpha self.loss_function = likelihood_dict.get(loss_function, None) @@ -274,8 +267,8 @@ def construct_model(self, X, Y): self.Xt = theano.shared(X) self.Yt = theano.shared(Y) shapes = { - "weights": self.n_object_features, - "weights_ik": (self.n_object_features, self.n_nests), + "weights": self.n_object_features_fit_, + "weights_ik": (self.n_object_features_fit_, self.n_nests), } weights_dict = create_weight_dictionary(self.model_configuration, shapes) @@ -339,19 +332,28 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + if self.n_nests is None: + # TODO this looks like a bug to me, but it was already done this way + # before (moved out of __init__). The `n_objects` summand probably + # should be removed. + self.n_nests = self.n_objects_fit_ + int(self.n_objects_fit_ / 2) self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) def _predict_scores_fixed(self, X, **kwargs): mean_trace = dict(pm.summary(self.trace)["mean"]) weights = np.array( - [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)] + [ + mean_trace["weights[{}]".format(i)] + for i in range(self.n_object_features_fit_) + ] ) lambda_k = np.array( [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)] ) - weights_ik = np.zeros((self.n_object_features, self.n_nests)) - for i, k in product(range(self.n_object_features), range(self.n_nests)): + weights_ik = np.zeros((self.n_object_features_fit_, self.n_nests)) + for i, k in product(range(self.n_object_features_fit_), range(self.n_nests)): weights_ik[i][k] = mean_trace["weights_ik[{},{}]".format(i, k)] alpha_ik = np.dot(X, weights_ik) alpha_ik = npu.softmax(alpha_ik, axis=2) @@ -389,8 +391,9 @@ def set_tunable_parameters( """ if alpha is not None: self.alpha = alpha + # TODO see the comment for n_nests above if n_nests is None: - self.n_nests = self.n_objects + int(self.n_objects / 2) + self.n_nests = self.n_objects_fit + int(self.n_objects_fit / 2) else: self.n_nests = n_nests if loss_function in likelihood_dict.keys(): From a2712f9806a918f60c679ad4f0accf0a75f7a422 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Thu, 14 May 2020 17:27:57 +0200 Subject: [PATCH 08/16] Determine data dimensionality lazily in PairedCombinatorialLogit --- .../paired_combinatorial_logit.py | 24 +++++++++---------- csrank/tests/test_ranking.py | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py index b8262843..4502d095 100644 --- a/csrank/discretechoice/paired_combinatorial_logit.py +++ b/csrank/discretechoice/paired_combinatorial_logit.py @@ -34,8 +34,6 @@ class PairedCombinatorialLogit(DiscreteObjectChooser, Learner): def __init__( self, - n_object_features, - n_objects, loss_function="", regularization="l2", alpha=5e-2, @@ -67,8 +65,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_objects: int Number of objects in each query set n_nests : int range : [2,n_objects/2] @@ -93,10 +89,6 @@ def __init__( [3] Chaushie Chu. „A paired combinatorial logit model for travel demand analysis“. In: Proceedings of the fifth world conference on transportation research. Vol. 4.1989, pp. 295–309 """ self.logger = logging.getLogger(PairedCombinatorialLogit.__name__) - self.n_object_features = n_object_features - self.n_objects = n_objects - self.nests_indices = np.array(list(combinations(np.arange(n_objects), 2))) - self.n_nests = len(self.nests_indices) self.alpha = alpha self.random_state = random_state self.loss_function = likelihood_dict.get(loss_function, None) @@ -194,7 +186,7 @@ def get_probabilities(self, utility, lambda_k): Choice probabilities :math:`P_i` of the objects :math:`x_i \\in Q` in the query sets """ - n_objects = self.n_objects + n_objects = self.n_objects_fit_ nests_indices = self.nests_indices n_nests = self.n_nests lambdas = tt.ones((n_objects, n_objects), dtype=np.float) @@ -220,7 +212,7 @@ def get_probabilities(self, utility, lambda_k): return p def _get_probabilities_np(self, utility, lambda_k): - n_objects = self.n_objects + n_objects = self.n_objects_fit_ nests_indices = self.nests_indices n_nests = self.n_nests temp_lambdas = np.ones((n_objects, n_objects), lambda_k.dtype) @@ -269,7 +261,7 @@ def construct_model(self, X, Y): with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) - shapes = {"weights": self.n_object_features} + shapes = {"weights": self.n_object_features_fit_} weights_dict = create_weight_dictionary(self.model_configuration, shapes) lambda_k = pm.Uniform("lambda_k", self.alpha, 1.0, shape=self.n_nests) utility = tt.dot(self.Xt, weights_dict["weights"]) @@ -330,13 +322,21 @@ def fit( Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ self.random_state_ = check_random_state(self.random_state) + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape + self.nests_indices = np.array( + list(combinations(np.arange(self.n_objects_fit_), 2)) + ) + self.n_nests = len(self.nests_indices) self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) def _predict_scores_fixed(self, X, **kwargs): mean_trace = dict(pm.summary(self.trace)["mean"]) weights = np.array( - [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)] + [ + mean_trace["weights[{}]".format(i)] + for i in range(self.n_object_features_fit_) + ] ) lambda_k = np.array( [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)] diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py index f7b6509e..b719d83a 100644 --- a/csrank/tests/test_ranking.py +++ b/csrank/tests/test_ranking.py @@ -70,8 +70,8 @@ def check_params_tunable(tunable_obj, params, rtol=1e-2, atol=1e-4): isinstance(tunable_obj, PairedCombinatorialLogit) and key == "n_nests" ): - tunable_obj.n_nests == tunable_obj.n_objects * ( - tunable_obj.n_objects - 1 + tunable_obj.n_nests == tunable_obj.n_objects_fit_ * ( + tunable_obj.n_objects_fit_ - 1 ) / 2 else: assert np.isclose( From 9560cb078e0e89d30311224e98962f55afdec0fa Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:25:59 +0200 Subject: [PATCH 09/16] Determine data dimensionality lazily in RankNet --- csrank/choicefunction/ranknet_choice.py | 10 +-------- csrank/core/ranknet_core.py | 22 +++++++++---------- .../discretechoice/ranknet_discrete_choice.py | 10 +-------- csrank/objectranking/rank_net.py | 10 +-------- docs/notebooks/Rank-Net-Choice.ipynb | 1 - 5 files changed, 14 insertions(+), 39 deletions(-) diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index ee42b504..1a5fa142 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -12,7 +12,6 @@ class RankNetChoiceFunction(RankNetCore, ChoiceFunctions): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -40,8 +39,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -74,7 +71,6 @@ def __init__( [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581). """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -89,11 +85,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(RankNetChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") self.threshold = 0.5 def construct_model(self): diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index 6c7b5203..c6fbd8b8 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -21,7 +21,6 @@ class RankNetCore(Learner): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -36,7 +35,6 @@ def __init__( **kwargs ): self.logger = logging.getLogger(RankNetCore.__name__) - self.n_object_features = n_object_features self.batch_normalization = batch_normalization self.activation = activation self.metrics = metrics @@ -58,17 +56,11 @@ def __init__( self.model = None self.hash_file = None self.random_state = random_state - self._construct_layers( - kernel_regularizer=self.kernel_regularizer, - kernel_initializer=self.kernel_initializer, - activation=self.activation, - **self.kwargs - ) def _construct_layers(self, **kwargs): self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) - self.x1 = Input(shape=(self.n_object_features,)) - self.x2 = Input(shape=(self.n_object_features,)) + self.x1 = Input(shape=(self.n_object_features_fit_,)) + self.x2 = Input(shape=(self.n_object_features_fit_,)) self.output_node = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) @@ -149,11 +141,19 @@ def fit( Keyword arguments for the fit function """ self.random_state_ = check_random_state(self.random_state) + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape X1, X2, Y_single = self._convert_instances_(X, Y) self.logger.debug("Instances created {}".format(X1.shape[0])) self.logger.debug("Creating the model") + self._construct_layers( + kernel_regularizer=self.kernel_regularizer, + kernel_initializer=self.kernel_initializer, + activation=self.activation, + **self.kwargs + ) + # Model with input as two objects and output as probability of x1>x2 self.model = self.construct_model() self.logger.debug("Finished Creating the model, now fitting started") @@ -182,7 +182,7 @@ def scoring_model(self): """ if self._scoring_model is None: self.logger.info("creating scoring model") - inp = Input(shape=(self.n_object_features,)) + inp = Input(shape=(self.n_object_features_fit_,)) x = inp for hidden_layer in self.hidden_layers: x = hidden_layer(x) diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index c09f328e..3d0c60b1 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -11,7 +11,6 @@ class RankNetDiscreteChoiceFunction(RankNetCore, DiscreteObjectChooser): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -40,8 +39,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -74,7 +71,6 @@ def __init__( [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581). """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -89,11 +85,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(RankNetDiscreteChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def construct_model(self): return super().construct_model() diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index c82a5726..aa04766b 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -13,7 +13,6 @@ class RankNet(RankNetCore, ObjectRanker): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -40,8 +39,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -76,7 +73,6 @@ def __init__( """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -91,11 +87,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(RankNet.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def construct_model(self): return super().construct_model() diff --git a/docs/notebooks/Rank-Net-Choice.ipynb b/docs/notebooks/Rank-Net-Choice.ipynb index 342aac22..d038a760 100644 --- a/docs/notebooks/Rank-Net-Choice.ipynb +++ b/docs/notebooks/Rank-Net-Choice.ipynb @@ -124,7 +124,6 @@ "outputs": [], "source": [ "ranknet = RankNetChoiceFunction(\n", - " n_object_features=n_features,\n", " optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))" ] }, From 795f6a349c9257ad0640e70ef0c72c28f30021a5 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:16:51 +0200 Subject: [PATCH 10/16] Determine data dimensionality lazily in CmpNet --- csrank/choicefunction/cmpnet_choice.py | 10 +--------- csrank/core/cmpnet_core.py | 20 +++++++++---------- .../discretechoice/cmpnet_discrete_choice.py | 10 +--------- csrank/objectranking/cmp_net.py | 10 +--------- 4 files changed, 13 insertions(+), 37 deletions(-) diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index d30e81a8..5c069ff3 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -12,7 +12,6 @@ class CmpNetChoiceFunction(CmpNetCore, ChoiceFunctions): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -49,8 +48,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -80,7 +77,6 @@ def __init__( """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -95,11 +91,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(CmpNetChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") self.threshold = 0.5 def _convert_instances_(self, X, Y): diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 9aaca074..ef70dd27 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -22,7 +22,6 @@ class CmpNetCore(Learner): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -37,7 +36,6 @@ def __init__( **kwargs ): self.logger = logging.getLogger("CmpNet") - self.n_object_features = n_object_features self.batch_normalization = batch_normalization self.activation = activation self.hash_file = None @@ -62,20 +60,15 @@ def __init__( self.threshold_instances = int(1e10) self.random_state = random_state self.model = None - self._construct_layers( - kernel_regularizer=self.kernel_regularizer, - kernel_initializer=self.kernel_initializer, - activation=self.activation, - **self.kwargs - ) def _construct_layers(self, **kwargs): self.output_node = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) - self.x1 = Input(shape=(self.n_object_features,)) - self.x2 = Input(shape=(self.n_object_features,)) + + self.x1 = Input(shape=(self.n_object_features_fit_,)) + self.x2 = Input(shape=(self.n_object_features_fit_,)) if self.batch_normalization: self.hidden_layers = [ NormalizedDense(self.n_units, name="hidden_{}".format(x), **kwargs) @@ -157,9 +150,16 @@ def fit( Keyword arguments for the fit function """ self.random_state_ = check_random_state(self.random_state) + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape x1, x2, y_double = self._convert_instances_(X, Y) self.logger.debug("Instances created {}".format(x1.shape[0])) + self._construct_layers( + kernel_regularizer=self.kernel_regularizer, + kernel_initializer=self.kernel_initializer, + activation=self.activation, + **self.kwargs + ) self.model = self.construct_model() self.logger.debug("Finished Creating the model, now fitting started") diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index e5440688..457b26a5 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -11,7 +11,6 @@ class CmpNetDiscreteChoiceFunction(CmpNetCore, DiscreteObjectChooser): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -46,8 +45,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -78,7 +75,6 @@ def __init__( [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875 """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -93,11 +89,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(CmpNetDiscreteChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def _convert_instances_(self, X, Y): self.logger.debug("Creating the Dataset") diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index ad218992..d86e01ea 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -13,7 +13,6 @@ class CmpNet(CmpNetCore, ObjectRanker): def __init__( self, - n_object_features, n_hidden=2, n_units=8, loss_function="binary_crossentropy", @@ -50,8 +49,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_hidden : int Number of hidden layers used in the scoring network n_units : int @@ -83,7 +80,6 @@ def __init__( [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875 """ super().__init__( - n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units, loss_function=loss_function, @@ -98,11 +94,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(CmpNet.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def _convert_instances_(self, X, Y): self.logger.debug("Creating the Dataset") From 393bf1968a0bfea6cec68dba14c9eb9173574381 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:14:11 +0200 Subject: [PATCH 11/16] Determine data dimensionality lazily in PairwiseSVM --- csrank/choicefunction/pairwise_choice.py | 13 +++---------- csrank/core/pairwise_svm.py | 7 ++----- csrank/discretechoice/pairwise_discrete_choice.py | 13 +++---------- csrank/objectranking/rank_svm.py | 13 +++---------- 4 files changed, 11 insertions(+), 35 deletions(-) diff --git a/csrank/choicefunction/pairwise_choice.py b/csrank/choicefunction/pairwise_choice.py index f6e5448a..890c30e6 100644 --- a/csrank/choicefunction/pairwise_choice.py +++ b/csrank/choicefunction/pairwise_choice.py @@ -10,7 +10,6 @@ class PairwiseSVMChoiceFunction(PairwiseSVM, ChoiceFunctions): def __init__( self, - n_object_features, C=1.0, tol=1e-4, normalize=True, @@ -30,8 +29,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space C : float, optional Penalty parameter of the error term tol : float, optional @@ -54,7 +51,6 @@ def __init__( """ super().__init__( - n_object_features=n_object_features, C=C, tol=tol, normalize=normalize, @@ -63,11 +59,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(PairwiseSVMChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") self.threshold = 0.5 def _convert_instances_(self, X, Y): @@ -80,7 +72,7 @@ def _convert_instances_(self, X, Y): y_single, ) = generate_complete_pairwise_dataset(X, Y) del garbage - assert x_train.shape[1] == self.n_object_features + assert x_train.shape[1] == self.n_object_features_fit_ self.logger.debug( "Finished the Dataset with instances {}".format(x_train.shape[0]) ) @@ -107,6 +99,7 @@ def fit(self, X, Y, tune_size=0.1, thin_thresholds=1, verbose=0, **kwd): Keyword arguments for the fit function """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape if tune_size > 0: X_train, X_val, Y_train, Y_val = train_test_split( X, Y, test_size=tune_size, random_state=self.random_state diff --git a/csrank/core/pairwise_svm.py b/csrank/core/pairwise_svm.py index 23bbffba..d63197f1 100644 --- a/csrank/core/pairwise_svm.py +++ b/csrank/core/pairwise_svm.py @@ -13,7 +13,6 @@ class PairwiseSVM(Learner): def __init__( self, - n_object_features, C=1.0, tol=1e-4, normalize=True, @@ -25,8 +24,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space C : float, optional Penalty parameter of the error term tol : float, optional @@ -45,7 +42,6 @@ def __init__( [1] Joachims, T. (2002, July). "Optimizing search engines using clickthrough data.", Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 133-142). ACM. """ self.normalize = normalize - self.n_object_features = n_object_features self.C = C self.tol = tol self.logger = logging.getLogger("RankSVM") @@ -71,6 +67,7 @@ def fit(self, X, Y, **kwargs): """ self.random_state_ = check_random_state(self.random_state) + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape x_train, y_single = self._convert_instances_(X, Y) if x_train.shape[0] > self.threshold_instances: self.model = LogisticRegression( @@ -101,7 +98,7 @@ def fit(self, X, Y, **kwargs): self.logger.debug("Fitting Complete") def _predict_scores_fixed(self, X, **kwargs): - assert X.shape[-1] == self.n_object_features + assert X.shape[-1] == self.n_object_features_fit_ self.logger.info( "For Test instances {} objects {} features {}".format(*X.shape) ) diff --git a/csrank/discretechoice/pairwise_discrete_choice.py b/csrank/discretechoice/pairwise_discrete_choice.py index ed6efdc4..5200007c 100644 --- a/csrank/discretechoice/pairwise_discrete_choice.py +++ b/csrank/discretechoice/pairwise_discrete_choice.py @@ -8,7 +8,6 @@ class PairwiseSVMDiscreteChoiceFunction(PairwiseSVM, DiscreteObjectChooser): def __init__( self, - n_object_features, C=1.0, tol=1e-4, normalize=True, @@ -28,8 +27,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space C : float, optional Penalty parameter of the error term tol : float, optional @@ -51,7 +48,6 @@ def __init__( [2] Sebastián Maldonado, Ricardo Montoya, and Richard Weber. „Advanced conjoint analysis using feature selection via support vector machines“. In: European Journal of Operational Research 241.2 (2015), pp. 564 –574. """ super().__init__( - n_object_features=n_object_features, C=C, tol=tol, normalize=normalize, @@ -60,11 +56,7 @@ def __init__( **kwargs ) self.logger = logging.getLogger(PairwiseSVMDiscreteChoiceFunction.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def _convert_instances_(self, X, Y): self.logger.debug("Creating the Dataset") @@ -76,13 +68,14 @@ def _convert_instances_(self, X, Y): y_single, ) = generate_complete_pairwise_dataset(X, Y) del garbage - assert x_train.shape[1] == self.n_object_features + assert x_train.shape[1] == self.n_object_features_fit_ self.logger.debug( "Finished the Dataset with instances {}".format(x_train.shape[0]) ) return x_train, y_single def fit(self, X, Y, **kwd): + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape super().fit(X, Y, **kwd) def _predict_scores_fixed(self, X, **kwargs): diff --git a/csrank/objectranking/rank_svm.py b/csrank/objectranking/rank_svm.py index de26bc24..f59d5a45 100644 --- a/csrank/objectranking/rank_svm.py +++ b/csrank/objectranking/rank_svm.py @@ -10,7 +10,6 @@ class RankSVM(ObjectRanker, PairwiseSVM): def __init__( self, - n_object_features, C=1.0, tol=1e-4, normalize=True, @@ -30,8 +29,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space C : float, optional Penalty parameter of the error term tol : float, optional @@ -50,7 +47,6 @@ def __init__( [1] Joachims, T. (2002, July). "Optimizing search engines using clickthrough data.", Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 133-142). ACM. """ super().__init__( - n_object_features=n_object_features, C=C, tol=tol, normalize=normalize, @@ -59,13 +55,10 @@ def __init__( **kwargs ) self.logger = logging.getLogger(RankSVM.__name__) - self.logger.info( - "Initializing network with object features {}".format( - self.n_object_features - ) - ) + self.logger.info("Initializing network") def fit(self, X, Y, **kwargs): + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape super().fit(X, Y, **kwargs) def _convert_instances_(self, X, Y): @@ -78,7 +71,7 @@ def _convert_instances_(self, X, Y): y_single, ) = generate_complete_pairwise_dataset(X, Y) del garbage - assert x_train.shape[1] == self.n_object_features + assert x_train.shape[1] == self.n_object_features_fit_ self.logger.debug( "Finished the Dataset with instances {}".format(x_train.shape[0]) ) From 4950b3b2780fadf80c8439180ce15bde51283933 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:44:20 +0200 Subject: [PATCH 12/16] Determine data dimensionality lazily in GeneralizedLinearModel --- csrank/choicefunction/generalized_linear_model.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/csrank/choicefunction/generalized_linear_model.py b/csrank/choicefunction/generalized_linear_model.py index 5db21f25..fc68687c 100644 --- a/csrank/choicefunction/generalized_linear_model.py +++ b/csrank/choicefunction/generalized_linear_model.py @@ -30,9 +30,7 @@ class GeneralizedLinearModel(ChoiceFunctions, Learner): - def __init__( - self, n_object_features, regularization="l2", random_state=None, **kwargs - ): + def __init__(self, regularization="l2", random_state=None, **kwargs): """ Create an instance of the GeneralizedLinearModel model for learning the choice function. This model is adapted from the multinomial logit model :class:`csrank.discretechoice.multinomial_logit_model.MultinomialLogitModel`. @@ -52,8 +50,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space regularization : string, optional Regularization technique to be used for estimating the weights random_state : int or object @@ -68,7 +64,6 @@ def __init__( [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986 """ self.logger = logging.getLogger(GeneralizedLinearModel.__name__) - self.n_object_features = n_object_features if regularization in ["l1", "l2"]: self.regularization = regularization else: @@ -156,8 +151,8 @@ def construct_model(self, X, Y): with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) - shapes = {"weights": self.n_object_features} - # shapes = {'weights': (self.n_object_features, 3)} + shapes = {"weights": self.n_object_features_fit_} + # shapes = {'weights': (self.n_object_features_fit_, 3)} weights_dict = create_weight_dictionary(self.model_configuration, shapes) intercept = pm.Normal("intercept", mu=0, sd=10) utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept @@ -274,6 +269,7 @@ def _fit( }, **kwargs ): + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) @@ -281,7 +277,7 @@ def _predict_scores_fixed(self, X, **kwargs): d = dict(pm.summary(self.trace)["mean"]) intercept = 0.0 weights = np.array( - [d["weights[{}]".format(i)] for i in range(self.n_object_features)] + [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)] ) if "intercept" in d: intercept = intercept + d["intercept"] From f125f12599fef1cff60f035d8e5fb75640b683ae Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:45:38 +0200 Subject: [PATCH 13/16] Determine data dimensionality lazily in MixedLogitModel --- csrank/discretechoice/mixed_logit_model.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/csrank/discretechoice/mixed_logit_model.py b/csrank/discretechoice/mixed_logit_model.py index 4501c98d..2a41ea24 100644 --- a/csrank/discretechoice/mixed_logit_model.py +++ b/csrank/discretechoice/mixed_logit_model.py @@ -31,14 +31,7 @@ class MixedLogitModel(DiscreteObjectChooser, Learner): - def __init__( - self, - n_object_features, - n_mixtures=4, - loss_function="", - regularization="l2", - **kwargs - ): + def __init__(self, n_mixtures=4, loss_function="", regularization="l2", **kwargs): """ Create an instance of the Mixed Logit model for learning the discrete choice function. In this model we assume weights of this model to be random due to which this model can learn different variations in choices @@ -59,8 +52,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space n_mixtures: int (range : [2, inf]) The number of logit models (:math:`R`) which are used to estimate the choice probability loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’} @@ -81,7 +72,6 @@ def __init__( [3] Daniel McFadden and Kenneth Train. „Mixed MNL models for discrete response“. In: Journal of applied Econometrics 15.5 (2000), pp. 447–470 """ self.logger = logging.getLogger(MixedLogitModel.__name__) - self.n_object_features = n_object_features self.loss_function = likelihood_dict.get(loss_function, None) if regularization in ["l1", "l2"]: self.regularization = regularization @@ -166,7 +156,7 @@ def construct_model(self, X, Y): with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) - shapes = {"weights": (self.n_object_features, self.n_mixtures)} + shapes = {"weights": (self.n_object_features_fit_, self.n_mixtures)} weights_dict = create_weight_dictionary(self.model_configuration, shapes) utility = tt.dot(self.Xt, weights_dict["weights"]) self.p = tt.mean(ttu.softmax(utility, axis=1), axis=2) @@ -225,13 +215,14 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) def _predict_scores_fixed(self, X, **kwargs): summary = dict(pm.summary(self.trace)["mean"]) - weights = np.zeros((self.n_object_features, self.n_mixtures)) - for i, k in product(range(self.n_object_features), range(self.n_mixtures)): + weights = np.zeros((self.n_object_features_fit_, self.n_mixtures)) + for i, k in product(range(self.n_object_features_fit_), range(self.n_mixtures)): weights[i][k] = summary["weights[{},{}]".format(i, k)] utility = np.dot(X, weights) p = np.mean(npu.softmax(utility, axis=1), axis=2) From 3e3df20a80268db93bb1ecb0490a63e326f391d3 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:46:01 +0200 Subject: [PATCH 14/16] Determine data dimensionality lazily in MultinomialLogitModel --- csrank/discretechoice/multinomial_logit_model.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/csrank/discretechoice/multinomial_logit_model.py b/csrank/discretechoice/multinomial_logit_model.py index 085b85ab..eb0d5d48 100644 --- a/csrank/discretechoice/multinomial_logit_model.py +++ b/csrank/discretechoice/multinomial_logit_model.py @@ -29,9 +29,7 @@ class MultinomialLogitModel(DiscreteObjectChooser, Learner): - def __init__( - self, n_object_features, loss_function="", regularization="l2", **kwargs - ): + def __init__(self, loss_function="", regularization="l2", **kwargs): """ Create an instance of the Multinomial Logit model for learning the discrete choice function. The utility score for each object in query set :math:`Q` is defined as :math:`U(x) = w \\cdot x`, where :math:`w` is @@ -50,8 +48,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’} Loss function to be used for the discrete choice decision from the query set regularization : string, {‘l1’, ‘l2’}, string @@ -68,7 +64,6 @@ def __init__( [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986 """ self.logger = logging.getLogger(MultinomialLogitModel.__name__) - self.n_object_features = n_object_features self.loss_function = likelihood_dict.get(loss_function, None) if regularization in ["l1", "l2"]: self.regularization = regularization @@ -157,8 +152,8 @@ def construct_model(self, X, Y): with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) - shapes = {"weights": self.n_object_features} - # shapes = {'weights': (self.n_object_features, 3)} + shapes = {"weights": self.n_object_features_fit_} + # shapes = {'weights': (self.n_object_features_fit_, 3)} weights_dict = create_weight_dictionary(self.model_configuration, shapes) intercept = pm.Normal("intercept", mu=0, sd=10) utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept @@ -219,6 +214,7 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) @@ -226,7 +222,7 @@ def _predict_scores_fixed(self, X, **kwargs): d = dict(pm.summary(self.trace)["mean"]) intercept = 0.0 weights = np.array( - [d["weights[{}]".format(i)] for i in range(self.n_object_features)] + [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)] ) if "intercept" in d: intercept = intercept + d["intercept"] From f6db015b50102d3ed04e95ab9a2749b29f5e0e9f Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:46:46 +0200 Subject: [PATCH 15/16] Remove unneeded init argument in ExpectedRankRegression --- csrank/objectranking/expected_rank_regression.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/csrank/objectranking/expected_rank_regression.py b/csrank/objectranking/expected_rank_regression.py index 6f063c86..61155e48 100644 --- a/csrank/objectranking/expected_rank_regression.py +++ b/csrank/objectranking/expected_rank_regression.py @@ -18,7 +18,6 @@ class ExpectedRankRegression(ObjectRanker, Learner): def __init__( self, - n_object_features, alpha=0.0, l1_ratio=0.5, tol=1e-4, @@ -46,8 +45,6 @@ def __init__( Parameters ---------- - n_object_features : int - Number of features of the object space alpha : float, optional Regularization strength l1_ratio : float, optional @@ -68,7 +65,6 @@ def __init__( [1] Kamishima, T., Kazawa, H., & Akaho, S. (2005, November). "Supervised ordering-an empirical survey.", Fifth IEEE International Conference on Data Mining. """ self.normalize = normalize - self.n_object_features = n_object_features self.alpha = alpha self.l1_ratio = l1_ratio self.tol = tol @@ -96,7 +92,6 @@ def fit(self, X, Y, **kwargs): self.random_state_ = check_random_state(self.random_state) self.logger.debug("Creating the Dataset") x_train, y_train = complete_linear_regression_dataset(X, Y) - assert x_train.shape[1] == self.n_object_features self.logger.debug("Finished the Dataset") if self.alpha < 1e-3: self.model = LinearRegression( From 7c0eef03dbb7c2ecef902f4fb070d9aef2f6ddb8 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Wed, 20 May 2020 20:03:40 +0200 Subject: [PATCH 16/16] Do not pass data dimensionality to learners After the last few commits the learners no longer need that information at initialization time. Instead, they determine it from the data when fitting. --- csrank/tests/test_choice_functions.py | 1 - csrank/tests/test_discrete_choice.py | 1 - csrank/tests/test_ranking.py | 1 - 3 files changed, 3 deletions(-) diff --git a/csrank/tests/test_choice_functions.py b/csrank/tests/test_choice_functions.py index bdddb957..b6158036 100644 --- a/csrank/tests/test_choice_functions.py +++ b/csrank/tests/test_choice_functions.py @@ -92,7 +92,6 @@ def test_choice_function_fixed(trivial_choice_problem, name): x, y = trivial_choice_problem choice_function = choice_functions[name][0] params, accuracies = choice_functions[name][1], choice_functions[name][2] - params["n_objects"], params["n_object_features"] = tuple(x.shape[1:]) learner = choice_function(**params) if name == GLM_CHOICE: learner.fit( diff --git a/csrank/tests/test_discrete_choice.py b/csrank/tests/test_discrete_choice.py index a0ec3889..5a2e0223 100644 --- a/csrank/tests/test_discrete_choice.py +++ b/csrank/tests/test_discrete_choice.py @@ -100,7 +100,6 @@ def test_discrete_choice_function_fixed(trivial_discrete_choice_problem, name): discrete_choice_functions[name][1], discrete_choice_functions[name][2], ) - params["n_objects"], params["n_object_features"] = tuple(x.shape[1:]) learner = choice_function(**params) if name in [MNL, NLM, GEV, PCL, MLM]: learner.fit( diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py index b719d83a..d2e213f9 100644 --- a/csrank/tests/test_ranking.py +++ b/csrank/tests/test_ranking.py @@ -110,7 +110,6 @@ def test_object_ranker_fixed(trivial_ranking_problem, ranker_name): np.random.seed(123) x, y = trivial_ranking_problem ranker, params, (loss, acc) = object_rankers[ranker_name] - params["n_objects"], params["n_object_features"] = tuple(x.shape[1:]) ranker = ranker(**params) if "linear" in ranker_name: ranker.fit(x, y, epochs=10, validation_split=0, verbose=False)