From 5bbf63c9db249eb71e287fee6b3aa69e7c96dd5a Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Sat, 2 May 2020 17:19:05 +0200
Subject: [PATCH 01/16] Determine data dimensionality lazily in FETALinear

Currently we require the data dimensionality to be passed into the
FETALinear constructor. It is then checked that it matches the
dimensionality of the data in `fit`. This is somewhat redundant and not
compatible with the scikit-learn estimator API.

We were already creating the model in the `fit` function. It was also
created when updating the hyperparameters, but that is redundant. In the
`fit` function we can simply derive the dimensionality from the data.
---
 csrank/choicefunction/fetalinear_choice.py    |  8 -------
 csrank/core/feta_linear.py                    | 23 +++++++++----------
 .../fetalinear_discrete_choice.py             |  8 -------
 .../objectranking/fetalinear_object_ranker.py |  8 -------
 4 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/csrank/choicefunction/fetalinear_choice.py b/csrank/choicefunction/fetalinear_choice.py
index abba6253..5dcc5cd5 100644
--- a/csrank/choicefunction/fetalinear_choice.py
+++ b/csrank/choicefunction/fetalinear_choice.py
@@ -10,8 +10,6 @@
 class FETALinearChoiceFunction(FETALinearCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         loss_function=binary_crossentropy,
         learning_rate=5e-3,
         batch_size=256,
@@ -40,10 +38,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -56,8 +50,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             learning_rate=learning_rate,
             batch_size=batch_size,
             loss_function=loss_function,
diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py
index 40dd11a1..e16aba03 100644
--- a/csrank/core/feta_linear.py
+++ b/csrank/core/feta_linear.py
@@ -15,8 +15,6 @@
 class FETALinearCore(Learner):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         learning_rate=1e-3,
         batch_size=256,
         loss_function=binary_crossentropy,
@@ -28,9 +26,7 @@ def __init__(
         self.learning_rate = learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
-        self.n_object_features = n_object_features
         self.loss_function = loss_function
-        self.n_objects = n_objects
         self.epochs_drop = epochs_drop
         self.drop = drop
         self.current_lr = None
@@ -43,20 +39,24 @@ def __init__(
         self.W_last = None
 
     def _construct_model_(self, n_objects):
-        self.X = tf.placeholder("float32", [None, n_objects, self.n_object_features])
+        self.X = tf.placeholder(
+            "float32", [None, n_objects, self.n_object_features_fit_]
+        )
         self.Y = tf.placeholder("float32", [None, n_objects])
-        std = 1 / np.sqrt(self.n_object_features)
+        std = 1 / np.sqrt(self.n_object_features_fit_)
         self.b1 = tf.Variable(
             self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32
         )
         self.W1 = tf.Variable(
             self.random_state_.normal(
-                loc=0, scale=std, size=2 * self.n_object_features
+                loc=0, scale=std, size=2 * self.n_object_features_fit_
             ),
             dtype=tf.float32,
         )
         self.W2 = tf.Variable(
-            self.random_state_.normal(loc=0, scale=std, size=self.n_object_features),
+            self.random_state_.normal(
+                loc=0, scale=std, size=self.n_object_features_fit_
+            ),
             dtype=tf.float32,
         )
         self.b2 = tf.Variable(
@@ -101,9 +101,8 @@ def fit(
     ):
         self.random_state_ = check_random_state(self.random_state)
         # Global Variables Initializer
-        n_instances, n_objects, n_features = X.shape
-        assert n_features == self.n_object_features
-        self._construct_model_(n_objects)
+        n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        self._construct_model_(self.n_objects_fit_)
         init = tf.global_variables_initializer()
 
         with tf.Session() as tf_session:
@@ -148,7 +147,7 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose):
 
     def _predict_scores_fixed(self, X, **kwargs):
         n_instances, n_objects, n_features = X.shape
-        assert n_features == self.n_object_features
+        assert n_features == self.n_object_features_fit_
         outputs = [list() for _ in range(n_objects)]
         for i, j in combinations(range(n_objects), 2):
             x1 = X[:, i]
diff --git a/csrank/discretechoice/fetalinear_discrete_choice.py b/csrank/discretechoice/fetalinear_discrete_choice.py
index c09c1504..9215a7d6 100644
--- a/csrank/discretechoice/fetalinear_discrete_choice.py
+++ b/csrank/discretechoice/fetalinear_discrete_choice.py
@@ -9,8 +9,6 @@
 class FETALinearDiscreteChoiceFunction(FETALinearCore, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         loss_function=categorical_hinge,
         learning_rate=5e-3,
         batch_size=256,
@@ -39,10 +37,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -55,8 +49,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             learning_rate=learning_rate,
             batch_size=batch_size,
             loss_function=loss_function,
diff --git a/csrank/objectranking/fetalinear_object_ranker.py b/csrank/objectranking/fetalinear_object_ranker.py
index 75216616..17641279 100644
--- a/csrank/objectranking/fetalinear_object_ranker.py
+++ b/csrank/objectranking/fetalinear_object_ranker.py
@@ -8,8 +8,6 @@
 class FETALinearObjectRanker(FETALinearCore, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         loss_function=hinged_rank_loss,
         learning_rate=5e-3,
         batch_size=256,
@@ -38,10 +36,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -54,8 +48,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             learning_rate=learning_rate,
             batch_size=batch_size,
             loss_function=loss_function,

From afc46e8ef2757b2d8a0b42e91b453eb0a2c26833 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 15:31:21 +0200
Subject: [PATCH 02/16] Determine data dimensionality lazily in FATELinear

---
 csrank/choicefunction/fatelinear_choice.py    |  8 -------
 csrank/core/fate_linear.py                    | 23 +++++++++----------
 .../fatelinear_discrete_choice.py             |  8 -------
 .../objectranking/fatelinear_object_ranker.py |  8 -------
 4 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/csrank/choicefunction/fatelinear_choice.py b/csrank/choicefunction/fatelinear_choice.py
index bce47b35..31191f9f 100644
--- a/csrank/choicefunction/fatelinear_choice.py
+++ b/csrank/choicefunction/fatelinear_choice.py
@@ -10,8 +10,6 @@
 class FATELinearChoiceFunction(FATELinearCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_hidden_set_units=2,
         loss_function=binary_crossentropy,
         learning_rate=1e-3,
@@ -41,10 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -57,8 +51,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             n_hidden_set_units=n_hidden_set_units,
             learning_rate=learning_rate,
             batch_size=batch_size,
diff --git a/csrank/core/fate_linear.py b/csrank/core/fate_linear.py
index 30de1dd7..c645d284 100644
--- a/csrank/core/fate_linear.py
+++ b/csrank/core/fate_linear.py
@@ -14,8 +14,6 @@
 class FATELinearCore(Learner):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_hidden_set_units=32,
         learning_rate=1e-3,
         batch_size=256,
@@ -29,9 +27,7 @@ def __init__(
         self.learning_rate = learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
-        self.n_object_features = n_object_features
         self.loss_function = loss_function
-        self.n_objects = n_objects
         self.epochs_drop = epochs_drop
         self.drop = drop
         self.current_lr = None
@@ -42,16 +38,20 @@ def __init__(
         self.optimizer = None
 
     def _construct_model_(self, n_objects):
-        self.X = tf.placeholder("float32", [None, n_objects, self.n_object_features])
+        self.X = tf.placeholder(
+            "float32", [None, n_objects, self.n_object_features_fit_]
+        )
         self.Y = tf.placeholder("float32", [None, n_objects])
-        std = 1 / np.sqrt(self.n_object_features)
+        std = 1 / np.sqrt(self.n_object_features_fit_)
         self.b1 = tf.Variable(
             self.random_state_.normal(loc=0, scale=std, size=self.n_hidden_set_units),
             dtype=tf.float32,
         )
         self.W1 = tf.Variable(
             self.random_state_.normal(
-                loc=0, scale=std, size=(self.n_object_features, self.n_hidden_set_units)
+                loc=0,
+                scale=std,
+                size=(self.n_object_features_fit_, self.n_hidden_set_units),
             ),
             dtype=tf.float32,
         )
@@ -59,7 +59,7 @@ def _construct_model_(self, n_objects):
             self.random_state_.normal(
                 loc=0,
                 scale=std,
-                size=(self.n_object_features + self.n_hidden_set_units),
+                size=(self.n_object_features_fit_ + self.n_hidden_set_units),
             ),
             dtype=tf.float32,
         )
@@ -95,9 +95,8 @@ def fit(
     ):
         self.random_state_ = check_random_state(self.random_state)
         # Global Variables Initializer
-        n_instances, n_objects, n_features = X.shape
-        assert n_features == self.n_object_features
-        self._construct_model_(n_objects)
+        n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        self._construct_model_(self.n_objects_fit_)
         init = tf.global_variables_initializer()
 
         with tf.Session() as tf_session:
@@ -141,7 +140,7 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose):
 
     def _predict_scores_fixed(self, X, **kwargs):
         n_instances, n_objects, n_features = X.shape
-        assert n_features == self.n_object_features
+        assert n_features == self.n_object_features_fit_
         rep = np.mean(np.dot(X, self.weight1), axis=1) + self.bias1
         rep = np.tile(rep[:, np.newaxis, :], (1, n_objects, 1))
         X_n = np.concatenate((X, rep), axis=2)
diff --git a/csrank/discretechoice/fatelinear_discrete_choice.py b/csrank/discretechoice/fatelinear_discrete_choice.py
index 015f6345..9e874af0 100644
--- a/csrank/discretechoice/fatelinear_discrete_choice.py
+++ b/csrank/discretechoice/fatelinear_discrete_choice.py
@@ -9,8 +9,6 @@
 class FATELinearDiscreteChoiceFunction(FATELinearCore, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_hidden_set_units=2,
         loss_function=categorical_hinge,
         learning_rate=1e-3,
@@ -40,10 +38,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -56,8 +50,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             n_hidden_set_units=n_hidden_set_units,
             learning_rate=learning_rate,
             batch_size=batch_size,
diff --git a/csrank/objectranking/fatelinear_object_ranker.py b/csrank/objectranking/fatelinear_object_ranker.py
index 092d9d11..dac946fb 100644
--- a/csrank/objectranking/fatelinear_object_ranker.py
+++ b/csrank/objectranking/fatelinear_object_ranker.py
@@ -8,8 +8,6 @@
 class FATELinearObjectRanker(FATELinearCore, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_hidden_set_units=2,
         loss_function=hinged_rank_loss,
         learning_rate=1e-3,
@@ -39,10 +37,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -55,8 +49,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             n_hidden_set_units=n_hidden_set_units,
             learning_rate=learning_rate,
             batch_size=batch_size,

From d3d0386a098673d6182c0f9aafbf593db13d1a39 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 15:49:57 +0200
Subject: [PATCH 03/16] Determine data dimensionality lazily in FATE

---
 README.rst                                 |  2 +-
 csrank/choicefunction/fate_choice.py       |  4 ----
 csrank/core/fate_network.py                | 16 ++++++----------
 csrank/objectranking/fate_object_ranker.py |  4 ----
 csrank/objectranking/feta_object_ranker.py |  8 --------
 csrank/tests/test_fate.py                  |  3 +--
 docs/intro.rst                             |  2 +-
 docs/notebooks/FATE-Net-DC.ipynb           |  1 -
 docs/notebooks/FATE-Net-Ranking.ipynb      |  1 -
 9 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/README.rst b/README.rst
index 058f52d4..a4909442 100644
--- a/README.rst
+++ b/README.rst
@@ -50,7 +50,7 @@ method:
 
 .. code-block:: python
 
-   fate = cs.FATEChoiceFunction(n_object_features=2)
+   fate = cs.FATEChoiceFunction()
    fate.fit(X_train, Y_train)
 
 Predictions can then be obtained using:
diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py
index a4d3c194..3e9ad830 100644
--- a/csrank/choicefunction/fate_choice.py
+++ b/csrank/choicefunction/fate_choice.py
@@ -13,7 +13,6 @@
 class FATEChoiceFunction(FATENetwork, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden_set_layers=2,
         n_hidden_set_units=2,
         n_hidden_joint_layers=32,
@@ -50,8 +49,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden_set_layers : int
                 Number of set layers.
             n_hidden_set_units : int
@@ -82,7 +79,6 @@ def __init__(
         self.loss_function = loss_function
         self.metrics = metrics
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden_set_layers=n_hidden_set_layers,
             n_hidden_set_units=n_hidden_set_units,
             n_hidden_joint_layers=n_hidden_joint_layers,
diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py
index 518a7a4e..58a18d8a 100644
--- a/csrank/core/fate_network.py
+++ b/csrank/core/fate_network.py
@@ -186,17 +186,13 @@ def set_tunable_parameters(
 
 
 class FATENetwork(FATENetworkCore):
-    def __init__(
-        self, n_object_features, n_hidden_set_layers=1, n_hidden_set_units=1, **kwargs
-    ):
+    def __init__(self, n_hidden_set_layers=1, n_hidden_set_units=1, **kwargs):
         """
             Create a FATE-network architecture.
             Training and prediction complexity is linear in the number of objects.
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden_set_layers : int
                 Number of hidden set layers.
             n_hidden_set_units : int
@@ -209,7 +205,6 @@ def __init__(
 
         self.n_hidden_set_layers = n_hidden_set_layers
         self.n_hidden_set_units = n_hidden_set_units
-        self.n_object_features = n_object_features
         self.model = None
         self.set_layer = None
         self._create_set_layers(
@@ -271,7 +266,7 @@ def _bucket_frequencies(X, min_bucket_size=32):
 
     def _construct_models(self, buckets):
         models = dict()
-        n_features = self.n_object_features
+        n_features = self.n_object_features_fit_
 
         for n_objects in buckets.keys():
             model = self.construct_model(n_features, n_objects)
@@ -501,6 +496,7 @@ def fit(
                 Keyword arguments for the fit function
         """
         self.random_state_ = check_random_state(self.random_state)
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         self._fit(
             X=X,
             Y=Y,
@@ -598,14 +594,14 @@ def _get_context_representation(self, X, kwargs):
             "Test Set instances {} objects {} features {}".format(*X.shape)
         )
         input_layer_scorer = Input(
-            shape=(n_objects, self.n_object_features), name="input_node"
+            shape=(n_objects, self.n_object_features_fit_), name="input_node"
         )
         if self.n_hidden_set_layers >= 1:
             self.set_layer(input_layer_scorer)
             fr = self.set_layer.cached_models[n_objects].predict(X, **kwargs)
             del self.set_layer.cached_models[n_objects]
             X_n = np.empty(
-                (fr.shape[0], n_objects, fr.shape[1] + self.n_object_features),
+                (fr.shape[0], n_objects, fr.shape[1] + self.n_object_features_fit_),
                 dtype="float",
             )
             for i in range(n_objects):
@@ -681,7 +677,7 @@ def clear_memory(self, n_objects=5, **kwargs):
                 kernel_regularizer=self.kernel_regularizer,
                 **self.kwargs
             )
-            self.model = self.construct_model(self.n_object_features, n_objects)
+            self.model = self.construct_model(self.n_object_features_fit_, n_objects)
             self.model.load_weights(self.hash_file)
         else:
             self.logger.info("Cannot clear the memory")
diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py
index 3d564c22..8e0f5dda 100644
--- a/csrank/objectranking/fate_object_ranker.py
+++ b/csrank/objectranking/fate_object_ranker.py
@@ -12,7 +12,6 @@
 class FATEObjectRanker(FATENetwork, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
         n_hidden_set_layers=2,
         n_hidden_set_units=2,
         n_hidden_joint_layers=32,
@@ -48,8 +47,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden_set_layers : int
                 Number of set layers.
             n_hidden_set_units : int
@@ -80,7 +77,6 @@ def __init__(
         self.loss_function = loss_function
         self.metrics = metrics
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden_set_layers=n_hidden_set_layers,
             n_hidden_set_units=n_hidden_set_units,
             n_hidden_joint_layers=n_hidden_joint_layers,
diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py
index db81aa66..b6e38b00 100644
--- a/csrank/objectranking/feta_object_ranker.py
+++ b/csrank/objectranking/feta_object_ranker.py
@@ -13,8 +13,6 @@
 class FETAObjectRanker(FETANetwork, ObjectRanker):
     def __init__(
         self,
-        n_objects,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         add_zeroth_order_model=False,
@@ -49,10 +47,6 @@ def __init__(
 
             Parameters
             ----------
-            n_objects : int
-                Number of objects to be ranked
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden : int
                 Number of hidden layers
             n_units : int
@@ -85,8 +79,6 @@ def __init__(
                 Keyword arguments for the hidden units
         """
         super().__init__(
-            n_objects=n_objects,
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             add_zeroth_order_model=add_zeroth_order_model,
diff --git a/csrank/tests/test_fate.py b/csrank/tests/test_fate.py
index 453403e9..9122ad7e 100644
--- a/csrank/tests/test_fate.py
+++ b/csrank/tests/test_fate.py
@@ -32,7 +32,7 @@ def predict(self, *args, **kwargs):
         def fit(self, *args, **kwargs):
             pass
 
-    grc = MockClass(n_objects=n_objects, n_features=n_features)
+    grc = MockClass()
     grc._construct_layers(
         activation=grc.activation,
         kernel_initializer=grc.kernel_initializer,
@@ -87,7 +87,6 @@ def trivial_ranking_problem_generator():
             yield x, y_true
 
     fate = FATEObjectRanker(
-        n_object_features=1,
         n_hidden_joint_layers=1,
         n_hidden_set_layers=1,
         n_hidden_joint_units=5,
diff --git a/docs/intro.rst b/docs/intro.rst
index da7b09b2..b270a0ba 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -52,7 +52,7 @@ method:
 
 .. code-block:: python
 
-   fate = cs.FATEChoiceFunction(n_object_features=2)
+   fate = cs.FATEChoiceFunction()
    fate.fit(X_train, Y_train)
 
 Predictions can then be obtained using:
diff --git a/docs/notebooks/FATE-Net-DC.ipynb b/docs/notebooks/FATE-Net-DC.ipynb
index 11234602..5997dfaa 100644
--- a/docs/notebooks/FATE-Net-DC.ipynb
+++ b/docs/notebooks/FATE-Net-DC.ipynb
@@ -139,7 +139,6 @@
     "from csrank import FATEObjectRanker\n",
     "from csrank.losses import smooth_rank_loss\n",
     "fate = FATEObjectRanker(\n",
-    "    n_object_features=n_features,\n",
     "    loss_function=smooth_rank_loss,\n",
     "    optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))"
    ]
diff --git a/docs/notebooks/FATE-Net-Ranking.ipynb b/docs/notebooks/FATE-Net-Ranking.ipynb
index eacef606..22a12b2b 100644
--- a/docs/notebooks/FATE-Net-Ranking.ipynb
+++ b/docs/notebooks/FATE-Net-Ranking.ipynb
@@ -131,7 +131,6 @@
     "from csrank import FATEObjectRanker\n",
     "from csrank.losses import smooth_rank_loss\n",
     "fate = FATEObjectRanker(\n",
-    "    n_object_features=n_features,\n",
     "    loss_function=smooth_rank_loss,\n",
     "    optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))"
    ]

From 77ede4606e53b31b2856dc91ad40ead71bdbcd32 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 16:33:13 +0200
Subject: [PATCH 04/16] Determine data dimensionality lazily in FETA

---
 csrank/choicefunction/feta_choice.py          | 20 +++----
 csrank/core/feta_network.py                   | 54 ++++++++++---------
 csrank/discretechoice/fate_discrete_choice.py |  4 --
 csrank/discretechoice/feta_discrete_choice.py | 26 ++++-----
 4 files changed, 45 insertions(+), 59 deletions(-)

diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py
index eb171b50..4389bb74 100644
--- a/csrank/choicefunction/feta_choice.py
+++ b/csrank/choicefunction/feta_choice.py
@@ -24,8 +24,6 @@
 class FETAChoiceFunction(FETANetwork, ChoiceFunctions):
     def __init__(
         self,
-        n_objects,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         add_zeroth_order_model=False,
@@ -61,10 +59,6 @@ def __init__(
 
             Parameters
             ----------
-            n_objects : int
-                Number of objects in each query set
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden : int
                 Number of hidden layers
             n_units : int
@@ -97,8 +91,6 @@ def __init__(
                 Keyword arguments for the hidden units
         """
         super().__init__(
-            n_objects=n_objects,
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             add_zeroth_order_model=add_zeroth_order_model,
@@ -119,7 +111,9 @@ def __init__(
         self.logger = logging.getLogger(FETAChoiceFunction.__name__)
 
     def _construct_layers(self, **kwargs):
-        self.input_layer = Input(shape=(self.n_objects, self.n_object_features))
+        self.input_layer = Input(
+            shape=(self.n_objects_fit_, self.n_object_features_fit_)
+        )
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
@@ -177,7 +171,7 @@ def create_input_lambda(i):
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
-            for i in range(self.n_objects):
+            for i in range(self.n_objects_fit_):
                 x = create_input_lambda(i)(self.input_layer)
                 inputs.append(x)
                 for hidden in self.hidden_layers_zeroth:
@@ -186,8 +180,8 @@ def create_input_lambda(i):
             zeroth_order_scores = concatenate(zeroth_order_outputs)
             self.logger.debug("0th order model finished")
         self.logger.debug("Create 1st order model")
-        outputs = [list() for _ in range(self.n_objects)]
-        for i, j in combinations(range(self.n_objects), 2):
+        outputs = [list() for _ in range(self.n_objects_fit_)]
+        for i, j in combinations(range(self.n_objects_fit_), 2):
             if self._use_zeroth_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
@@ -296,7 +290,7 @@ def fit(
             self.threshold = 0.5
 
     def sub_sampling(self, X, Y):
-        if self._n_objects <= self.max_number_of_objects:
+        if self.n_objects_fit_ <= self.max_number_of_objects:
             return X, Y
         n_objects = self.max_number_of_objects
         bucket_size = int(X.shape[1] / n_objects)
diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py
index 6088acac..ae7deb64 100644
--- a/csrank/core/feta_network.py
+++ b/csrank/core/feta_network.py
@@ -26,8 +26,6 @@
 class FETANetwork(Learner):
     def __init__(
         self,
-        n_objects,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         add_zeroth_order_model=False,
@@ -52,10 +50,8 @@ def __init__(
         self.activation = activation
         self.loss_function = loss_function
         self.metrics = metrics
-        self._n_objects = n_objects
         self.max_number_of_objects = max_number_of_objects
         self.num_subsample = num_subsample
-        self.n_object_features = n_object_features
         self.batch_size = batch_size
         self.hash_file = None
         self.optimizer = optimizers.get(optimizer)
@@ -68,24 +64,20 @@ def __init__(
             if key not in allowed_dense_kwargs:
                 del kwargs[key]
         self.kwargs = kwargs
-        self._construct_layers(
-            kernel_regularizer=self.kernel_regularizer,
-            kernel_initializer=self.kernel_initializer,
-            activation=self.activation,
-            **self.kwargs
-        )
         self._pairwise_model = None
         self.model = None
         self._zero_order_model = None
 
     @property
     def n_objects(self):
-        if self._n_objects > self.max_number_of_objects:
+        if self.n_objects_fit_ > self.max_number_of_objects:
             return self.max_number_of_objects
-        return self._n_objects
+        return self.n_objects_fit_
 
     def _construct_layers(self, **kwargs):
-        self.input_layer = Input(shape=(self.n_objects, self.n_object_features))
+        self.input_layer = Input(
+            shape=(self.n_objects_fit_, self.n_object_features_fit_)
+        )
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units))
@@ -124,7 +116,7 @@ def _construct_layers(self, **kwargs):
     def zero_order_model(self):
         if self._zero_order_model is None and self._use_zeroth_model:
             self.logger.info("Creating zeroth model")
-            inp = Input(shape=(self.n_object_features,))
+            inp = Input(shape=(self.n_object_features_fit_,))
 
             x = inp
             for hidden in self.hidden_layers_zeroth:
@@ -139,8 +131,8 @@ def zero_order_model(self):
     def pairwise_model(self):
         if self._pairwise_model is None:
             self.logger.info("Creating pairwise model")
-            x1 = Input(shape=(self.n_object_features,))
-            x2 = Input(shape=(self.n_object_features,))
+            x1 = Input(shape=(self.n_object_features_fit_,))
+            x2 = Input(shape=(self.n_object_features_fit_,))
 
             x1x2 = concatenate([x1, x2])
             x2x1 = concatenate([x2, x1])
@@ -213,7 +205,7 @@ def create_input_lambda(i):
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
-            for i in range(self.n_objects):
+            for i in range(self.n_objects_fit_):
                 x = create_input_lambda(i)(self.input_layer)
                 inputs.append(x)
                 for hidden in self.hidden_layers_zeroth:
@@ -222,8 +214,8 @@ def create_input_lambda(i):
             zeroth_order_scores = concatenate(zeroth_order_outputs)
             self.logger.debug("0th order model finished")
         self.logger.debug("Create 1st order model")
-        outputs = [list() for _ in range(self.n_objects)]
-        for i, j in combinations(range(self.n_objects), 2):
+        outputs = [list() for _ in range(self.n_objects_fit_)]
+        for i, j in combinations(range(self.n_objects_fit_), 2):
             if self._use_zeroth_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
@@ -289,6 +281,14 @@ def fit(
             **kwd :
                 Keyword arguments for the fit function
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        self._construct_layers(
+            kernel_regularizer=self.kernel_regularizer,
+            kernel_initializer=self.kernel_initializer,
+            activation=self.activation,
+            **self.kwargs
+        )
+
         self.logger.debug("Enter fit function...")
         self.random_state_ = check_random_state(self.random_state)
 
@@ -310,18 +310,20 @@ def fit(
             self.model.save_weights(self.hash_file)
 
     def sub_sampling(self, X, Y):
-        if self._n_objects > self.max_number_of_objects:
-            bucket_size = int(self._n_objects / self.max_number_of_objects)
-            idx = self.random_state_.randint(bucket_size, size=(len(X), self.n_objects))
+        if self.n_objects_fit_ > self.max_number_of_objects:
+            bucket_size = int(self.n_objects_fit_ / self.max_number_of_objects)
+            idx = self.random_state_.randint(
+                bucket_size, size=(len(X), self.n_objects_fit_)
+            )
             # TODO: subsampling multiple rankings
-            idx += np.arange(start=0, stop=self._n_objects, step=bucket_size)[
-                : self.n_objects
+            idx += np.arange(start=0, stop=self.n_objects_fit_, step=bucket_size)[
+                : self.n_objects_fit_
             ]
             X = X[np.arange(X.shape[0])[:, None], idx]
             Y = Y[np.arange(X.shape[0])[:, None], idx]
             tmp_sort = Y.argsort(axis=-1)
             Y = np.empty_like(Y)
-            Y[np.arange(len(X))[:, None], tmp_sort] = np.arange(self.n_objects)
+            Y[np.arange(len(X))[:, None], tmp_sort] = np.arange(self.n_objects_fit_)
         return X, Y
 
     def _predict_scores_fixed(self, X, **kwargs):
@@ -329,7 +331,7 @@ def _predict_scores_fixed(self, X, **kwargs):
         self.logger.info(
             "For Test instances {} objects {} features {}".format(*X.shape)
         )
-        if self.n_objects != n_objects:
+        if self.n_objects_fit_ != n_objects:
             scores = self._predict_scores_using_pairs(X, **kwargs)
         else:
             scores = self.model.predict(X, **kwargs)
diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py
index 3d3c5aec..4842c0f3 100644
--- a/csrank/discretechoice/fate_discrete_choice.py
+++ b/csrank/discretechoice/fate_discrete_choice.py
@@ -11,7 +11,6 @@
 class FATEDiscreteChoiceFunction(FATENetwork, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
         n_hidden_set_layers=2,
         n_hidden_set_units=2,
         loss_function="categorical_hinge",
@@ -48,8 +47,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden_set_layers : int
                 Number of set layers.
             n_hidden_set_units : int
@@ -80,7 +77,6 @@ def __init__(
         self.loss_function = loss_function
         self.metrics = metrics
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden_set_layers=n_hidden_set_layers,
             n_hidden_set_units=n_hidden_set_units,
             n_hidden_joint_layers=n_hidden_joint_layers,
diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py
index c40710eb..afbff700 100644
--- a/csrank/discretechoice/feta_discrete_choice.py
+++ b/csrank/discretechoice/feta_discrete_choice.py
@@ -22,8 +22,6 @@
 class FETADiscreteChoiceFunction(FETANetwork, DiscreteObjectChooser):
     def __init__(
         self,
-        n_objects,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         add_zeroth_order_model=False,
@@ -59,10 +57,6 @@ def __init__(
 
             Parameters
             ----------
-            n_objects : int
-                Number of objects in each query set
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden : int
                 Number of hidden layers
             n_units : int
@@ -95,8 +89,6 @@ def __init__(
                 Keyword arguments for the hidden units
         """
         super().__init__(
-            n_objects=n_objects,
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             add_zeroth_order_model=add_zeroth_order_model,
@@ -116,7 +108,9 @@ def __init__(
         self.logger = logging.getLogger(FETADiscreteChoiceFunction.__name__)
 
     def _construct_layers(self, **kwargs):
-        self.input_layer = Input(shape=(self.n_objects, self.n_object_features))
+        self.input_layer = Input(
+            shape=(self.n_objects_fit_, self.n_object_features_fit_)
+        )
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
@@ -186,7 +180,7 @@ def create_input_lambda(i):
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
-            for i in range(self.n_objects):
+            for i in range(self.n_objects_fit_):
                 x = create_input_lambda(i)(self.input_layer)
                 inputs.append(x)
                 for hidden in self.hidden_layers_zeroth:
@@ -195,8 +189,8 @@ def create_input_lambda(i):
             zeroth_order_scores = concatenate(zeroth_order_outputs)
             self.logger.debug("0th order model finished")
         self.logger.debug("Create 1st order model")
-        outputs = [list() for _ in range(self.n_objects)]
-        for i, j in combinations(range(self.n_objects), 2):
+        outputs = [list() for _ in range(self.n_objects_fit_)]
+        for i, j in combinations(range(self.n_objects_fit_), 2):
             if self._use_zeroth_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
@@ -239,10 +233,10 @@ def get_score_object(i):
                         get_score_object(i)(zeroth_order_scores),
                     ]
                 )
-                for i in range(self.n_objects)
+                for i in range(self.n_objects_fit_)
             ]
             scores = []
-            for i in range(self.n_objects):
+            for i in range(self.n_objects_fit_):
                 scores.append(self.weighted_sum(concat_scores[i]))
             scores = concatenate(scores)
 
@@ -259,7 +253,7 @@ def get_score_object(i):
         #     zeroth_order_scores = expand_dims()(zeroth_order_scores)
         #     concat_scores = concatenate([scores, zeroth_order_scores], axis=-1)
         #     weighted_sum = Conv1D(name='weighted_sum', filters=1, kernel_size=(1), strides=1, activation='linear',
-        #                          kernel_initializer=self.kernel_initializer, input_shape=(self.n_objects, 2),
+        #                          kernel_initializer=self.kernel_initializer, input_shape=(self.n_objects_fit_, 2),
         #                          kernel_regularizer=self.kernel_regularizer, use_bias=False)
         #     scores = weighted_sum(concat_scores)
         #     scores = squeeze_dims()(scores)
@@ -313,7 +307,7 @@ def _predict_scores_using_pairs(self, X, **kwd):
         return scores
 
     def _create_zeroth_order_model(self):
-        inp = Input(shape=(self.n_object_features,))
+        inp = Input(shape=(self.n_object_features_fit_,))
 
         x = inp
         for hidden in self.hidden_layers_zeroth:

From cad28c69de7e1a323a843c6de568f32039a05950 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 16:45:42 +0200
Subject: [PATCH 05/16] Determine data dimensionality lazily in ListNet

---
 csrank/objectranking/list_net.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py
index 076d0c07..0d53b167 100644
--- a/csrank/objectranking/list_net.py
+++ b/csrank/objectranking/list_net.py
@@ -26,8 +26,7 @@
 class ListNet(Learner, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
-        n_top,
+        n_top=1,
         n_hidden=2,
         n_units=8,
         loss_function=plackett_luce_loss,
@@ -53,8 +52,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_top : int
                 Size of the top-k-subrankings to consider for training
             hash_file: str
@@ -90,8 +87,6 @@ def __init__(
                 [1] Z. Cao, T. Qin, T. Liu, M. Tsai and H. Li. "Learning to Rank: From Pairwise Approach to Listwise Approach." ICML, 2007.
         """
         self.logger = logging.getLogger(ListNet.__name__)
-        self.n_object_features = n_object_features
-        self.n_objects = n_top
         self.n_top = n_top
         self.batch_normalization = batch_normalization
         self.activation = activation
@@ -108,12 +103,6 @@ def __init__(
             if key not in allowed_dense_kwargs:
                 del kwargs[key]
         self.kwargs = kwargs
-        self._construct_layers(
-            kernel_regularizer=self.kernel_regularizer,
-            kernel_initializer=self.kernel_initializer,
-            activation=self.activation,
-            **self.kwargs
-        )
 
         self.threshold_instances = int(1e10)
         self.batch_size = batch_size
@@ -123,7 +112,7 @@ def __init__(
         self._scoring_model = None
 
     def _construct_layers(self, **kwargs):
-        self.input_layer = Input(shape=(self.n_top, self.n_object_features))
+        self.input_layer = Input(shape=(self.n_top, self.n_object_features_fit_))
         self.output_node = Dense(
             1, activation="linear", kernel_regularizer=self.kernel_regularizer
         )
@@ -180,7 +169,13 @@ def fit(
                 Keyword arguments for the fit function
         """
         self.random_state_ = check_random_state(self.random_state)
-        self.n_objects = X.shape[1]
+        _n_instances, _n_objects, self.n_object_features_fit_ = X.shape
+        self._construct_layers(
+            kernel_regularizer=self.kernel_regularizer,
+            kernel_initializer=self.kernel_initializer,
+            activation=self.activation,
+            **self.kwargs
+        )
         self.logger.debug("Creating top-k dataset")
         X, Y = self._create_topk(X, Y)
         self.logger.debug("Finished creating the dataset")
@@ -236,7 +231,7 @@ def scoring_model(self):
         """
         if self._scoring_model is None:
             self.logger.info("Creating scoring model")
-            inp = Input(shape=(self.n_object_features,))
+            inp = Input(shape=(self.n_object_features_fit_,))
             x = inp
             for hidden_layer in self.hidden_layers:
                 x = hidden_layer(x)

From a47247b4c19407a873fef51c96c492e17c422809 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 16:55:44 +0200
Subject: [PATCH 06/16] Determine data dimensionality lazily in
 NestedLogitModel

---
 csrank/discretechoice/nested_logit_model.py | 33 ++++++++++-----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py
index e286a67a..a94ab78b 100644
--- a/csrank/discretechoice/nested_logit_model.py
+++ b/csrank/discretechoice/nested_logit_model.py
@@ -34,8 +34,6 @@
 class NestedLogitModel(DiscreteObjectChooser, Learner):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_nests=None,
         loss_function="",
         regularization="l1",
@@ -66,12 +64,10 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
-            n_objects: int
-                Number of objects in each query set
             n_nests : int range : [2,n_objects/2]
-                The number of nests/subsets in which the objects are divided
+                The number of nests/subsets in which the objects are divided.
+                This may not surpass half the amount of objects this model will
+                be trained on.
             loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’}
                 Loss function to be used for the discrete choice decision from the query set
             regularization : string, {‘l1’, ‘l2’}, string
@@ -92,12 +88,7 @@ def __init__(
                 [3] Kenneth Train and Daniel McFadden. „The goods/leisure tradeoff and disaggregate work trip mode choice models“. In: Transportation research 12.5 (1978), pp. 349–353
         """
         self.logger = logging.getLogger(NestedLogitModel.__name__)
-        self.n_object_features = n_object_features
-        self.n_objects = n_objects
-        if n_nests is None:
-            self.n_nests = int(n_objects / 2)
-        else:
-            self.n_nests = n_nests
+        self.n_nests = n_nests
         self.alpha = alpha
         self.random_state = random_state
         self.loss_function = likelihood_dict.get(loss_function, None)
@@ -334,8 +325,8 @@ def construct_model(self, X, Y):
             self.Yt = theano.shared(Y)
             self.y_nests = theano.shared(y_nests)
             shapes = {
-                "weights": self.n_object_features,
-                "weights_k": self.n_object_features,
+                "weights": self.n_object_features_fit_,
+                "weights_k": self.n_object_features_fit_,
             }
 
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
@@ -400,6 +391,9 @@ def fit(
             **kwargs :
                 Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        if self.n_nests is None:
+            self.n_nests = int(self.n_objects_fit_ / 2)
         self.random_state_ = check_random_state(self.random_state)
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
@@ -408,12 +402,15 @@ def _predict_scores_fixed(self, X, **kwargs):
         y_nests = self.create_nests(X)
         mean_trace = dict(pm.summary(self.trace)["mean"])
         weights = np.array(
-            [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [
+                mean_trace["weights[{}]".format(i)]
+                for i in range(self.n_object_features_fit_)
+            ]
         )
         weights_k = np.array(
             [
                 mean_trace["weights_k[{}]".format(i)]
-                for i in range(self.n_object_features)
+                for i in range(self.n_object_features_fit_)
             ]
         )
         lambda_k = np.array(
@@ -456,7 +453,7 @@ def set_tunable_parameters(
         if alpha is not None:
             self.alpha = alpha
         if n_nests is None:
-            self.n_nests = int(self.n_objects / 2)
+            self.n_nests = int(self.n_objects_fit_ / 2)
         else:
             self.n_nests = n_nests
         self.regularization = regularization

From 249378902cf2c0ac94eb40404f0a750fea639a5d Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 17:15:02 +0200
Subject: [PATCH 07/16] Determine data dimensionality lazily in
 GeneralizedNestedLogitModel

---
 .../generalized_nested_logit.py               | 37 ++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py
index d826b2f7..20ae573c 100644
--- a/csrank/discretechoice/generalized_nested_logit.py
+++ b/csrank/discretechoice/generalized_nested_logit.py
@@ -34,8 +34,6 @@
 class GeneralizedNestedLogitModel(DiscreteObjectChooser, Learner):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_nests=None,
         loss_function="None",
         regularization="l2",
@@ -64,12 +62,12 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_objects: int
                 Number of objects in each query set
             n_nests : int range : [2,n_objects/2]
-                The number of nests/subsets in which the objects are divided
+                The number of nests/subsets in which the objects are divided.
+                This may not surpass half the amount of objects this model will
+                be trained on.
             loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’}
                 Loss function to be used for the discrete choice decision from the query set
             regularization : string, {‘l1’, ‘l2’}, string
@@ -92,12 +90,7 @@ def __init__(
         """
         self.logger = logging.getLogger(GeneralizedNestedLogitModel.__name__)
 
-        self.n_object_features = n_object_features
-        self.n_objects = n_objects
-        if n_nests is None:
-            self.n_nests = n_objects + int(n_objects / 2)
-        else:
-            self.n_nests = n_nests
+        self.n_nests = n_nests
         self.alpha = alpha
         self.loss_function = likelihood_dict.get(loss_function, None)
 
@@ -274,8 +267,8 @@ def construct_model(self, X, Y):
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
             shapes = {
-                "weights": self.n_object_features,
-                "weights_ik": (self.n_object_features, self.n_nests),
+                "weights": self.n_object_features_fit_,
+                "weights_ik": (self.n_object_features_fit_, self.n_nests),
             }
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
 
@@ -339,19 +332,28 @@ def fit(
             **kwargs :
                 Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        if self.n_nests is None:
+            # TODO this looks like a bug to me, but it was already done this way
+            # before (moved out of __init__). The `n_objects` summand probably
+            # should be removed.
+            self.n_nests = self.n_objects_fit_ + int(self.n_objects_fit_ / 2)
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
     def _predict_scores_fixed(self, X, **kwargs):
         mean_trace = dict(pm.summary(self.trace)["mean"])
         weights = np.array(
-            [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [
+                mean_trace["weights[{}]".format(i)]
+                for i in range(self.n_object_features_fit_)
+            ]
         )
         lambda_k = np.array(
             [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)]
         )
-        weights_ik = np.zeros((self.n_object_features, self.n_nests))
-        for i, k in product(range(self.n_object_features), range(self.n_nests)):
+        weights_ik = np.zeros((self.n_object_features_fit_, self.n_nests))
+        for i, k in product(range(self.n_object_features_fit_), range(self.n_nests)):
             weights_ik[i][k] = mean_trace["weights_ik[{},{}]".format(i, k)]
         alpha_ik = np.dot(X, weights_ik)
         alpha_ik = npu.softmax(alpha_ik, axis=2)
@@ -389,8 +391,9 @@ def set_tunable_parameters(
         """
         if alpha is not None:
             self.alpha = alpha
+        # TODO see the comment for n_nests above
         if n_nests is None:
-            self.n_nests = self.n_objects + int(self.n_objects / 2)
+            self.n_nests = self.n_objects_fit + int(self.n_objects_fit / 2)
         else:
             self.n_nests = n_nests
         if loss_function in likelihood_dict.keys():

From a2712f9806a918f60c679ad4f0accf0a75f7a422 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Thu, 14 May 2020 17:27:57 +0200
Subject: [PATCH 08/16] Determine data dimensionality lazily in
 PairedCombinatorialLogit

---
 .../paired_combinatorial_logit.py             | 24 +++++++++----------
 csrank/tests/test_ranking.py                  |  4 ++--
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py
index b8262843..4502d095 100644
--- a/csrank/discretechoice/paired_combinatorial_logit.py
+++ b/csrank/discretechoice/paired_combinatorial_logit.py
@@ -34,8 +34,6 @@
 class PairedCombinatorialLogit(DiscreteObjectChooser, Learner):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         loss_function="",
         regularization="l2",
         alpha=5e-2,
@@ -67,8 +65,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_objects: int
                 Number of objects in each query set
             n_nests : int range : [2,n_objects/2]
@@ -93,10 +89,6 @@ def __init__(
                 [3] Chaushie Chu. „A paired combinatorial logit model for travel demand analysis“. In: Proceedings of the fifth world conference on transportation research. Vol. 4.1989, pp. 295–309
         """
         self.logger = logging.getLogger(PairedCombinatorialLogit.__name__)
-        self.n_object_features = n_object_features
-        self.n_objects = n_objects
-        self.nests_indices = np.array(list(combinations(np.arange(n_objects), 2)))
-        self.n_nests = len(self.nests_indices)
         self.alpha = alpha
         self.random_state = random_state
         self.loss_function = likelihood_dict.get(loss_function, None)
@@ -194,7 +186,7 @@ def get_probabilities(self, utility, lambda_k):
                 Choice probabilities :math:`P_i` of the objects :math:`x_i \\in Q` in the query sets
 
         """
-        n_objects = self.n_objects
+        n_objects = self.n_objects_fit_
         nests_indices = self.nests_indices
         n_nests = self.n_nests
         lambdas = tt.ones((n_objects, n_objects), dtype=np.float)
@@ -220,7 +212,7 @@ def get_probabilities(self, utility, lambda_k):
         return p
 
     def _get_probabilities_np(self, utility, lambda_k):
-        n_objects = self.n_objects
+        n_objects = self.n_objects_fit_
         nests_indices = self.nests_indices
         n_nests = self.n_nests
         temp_lambdas = np.ones((n_objects, n_objects), lambda_k.dtype)
@@ -269,7 +261,7 @@ def construct_model(self, X, Y):
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
-            shapes = {"weights": self.n_object_features}
+            shapes = {"weights": self.n_object_features_fit_}
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
             lambda_k = pm.Uniform("lambda_k", self.alpha, 1.0, shape=self.n_nests)
             utility = tt.dot(self.Xt, weights_dict["weights"])
@@ -330,13 +322,21 @@ def fit(
                Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
        """
         self.random_state_ = check_random_state(self.random_state)
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
+        self.nests_indices = np.array(
+            list(combinations(np.arange(self.n_objects_fit_), 2))
+        )
+        self.n_nests = len(self.nests_indices)
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
     def _predict_scores_fixed(self, X, **kwargs):
         mean_trace = dict(pm.summary(self.trace)["mean"])
         weights = np.array(
-            [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [
+                mean_trace["weights[{}]".format(i)]
+                for i in range(self.n_object_features_fit_)
+            ]
         )
         lambda_k = np.array(
             [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)]
diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py
index f7b6509e..b719d83a 100644
--- a/csrank/tests/test_ranking.py
+++ b/csrank/tests/test_ranking.py
@@ -70,8 +70,8 @@ def check_params_tunable(tunable_obj, params, rtol=1e-2, atol=1e-4):
                     isinstance(tunable_obj, PairedCombinatorialLogit)
                     and key == "n_nests"
                 ):
-                    tunable_obj.n_nests == tunable_obj.n_objects * (
-                        tunable_obj.n_objects - 1
+                    tunable_obj.n_nests == tunable_obj.n_objects_fit_ * (
+                        tunable_obj.n_objects_fit_ - 1
                     ) / 2
                 else:
                     assert np.isclose(

From 9560cb078e0e89d30311224e98962f55afdec0fa Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:25:59 +0200
Subject: [PATCH 09/16] Determine data dimensionality lazily in RankNet

---
 csrank/choicefunction/ranknet_choice.py       | 10 +--------
 csrank/core/ranknet_core.py                   | 22 +++++++++----------
 .../discretechoice/ranknet_discrete_choice.py | 10 +--------
 csrank/objectranking/rank_net.py              | 10 +--------
 docs/notebooks/Rank-Net-Choice.ipynb          |  1 -
 5 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py
index ee42b504..1a5fa142 100644
--- a/csrank/choicefunction/ranknet_choice.py
+++ b/csrank/choicefunction/ranknet_choice.py
@@ -12,7 +12,6 @@
 class RankNetChoiceFunction(RankNetCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -40,8 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -74,7 +71,6 @@ def __init__(
                 [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581).
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -89,11 +85,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(RankNetChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def construct_model(self):
diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py
index 6c7b5203..c6fbd8b8 100644
--- a/csrank/core/ranknet_core.py
+++ b/csrank/core/ranknet_core.py
@@ -21,7 +21,6 @@
 class RankNetCore(Learner):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -36,7 +35,6 @@ def __init__(
         **kwargs
     ):
         self.logger = logging.getLogger(RankNetCore.__name__)
-        self.n_object_features = n_object_features
         self.batch_normalization = batch_normalization
         self.activation = activation
         self.metrics = metrics
@@ -58,17 +56,11 @@ def __init__(
         self.model = None
         self.hash_file = None
         self.random_state = random_state
-        self._construct_layers(
-            kernel_regularizer=self.kernel_regularizer,
-            kernel_initializer=self.kernel_initializer,
-            activation=self.activation,
-            **self.kwargs
-        )
 
     def _construct_layers(self, **kwargs):
         self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units))
-        self.x1 = Input(shape=(self.n_object_features,))
-        self.x2 = Input(shape=(self.n_object_features,))
+        self.x1 = Input(shape=(self.n_object_features_fit_,))
+        self.x2 = Input(shape=(self.n_object_features_fit_,))
         self.output_node = Dense(
             1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
         )
@@ -149,11 +141,19 @@ def fit(
                 Keyword arguments for the fit function
         """
         self.random_state_ = check_random_state(self.random_state)
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         X1, X2, Y_single = self._convert_instances_(X, Y)
 
         self.logger.debug("Instances created {}".format(X1.shape[0]))
         self.logger.debug("Creating the model")
 
+        self._construct_layers(
+            kernel_regularizer=self.kernel_regularizer,
+            kernel_initializer=self.kernel_initializer,
+            activation=self.activation,
+            **self.kwargs
+        )
+
         # Model with input as two objects and output as probability of x1>x2
         self.model = self.construct_model()
         self.logger.debug("Finished Creating the model, now fitting started")
@@ -182,7 +182,7 @@ def scoring_model(self):
         """
         if self._scoring_model is None:
             self.logger.info("creating scoring model")
-            inp = Input(shape=(self.n_object_features,))
+            inp = Input(shape=(self.n_object_features_fit_,))
             x = inp
             for hidden_layer in self.hidden_layers:
                 x = hidden_layer(x)
diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py
index c09f328e..3d0c60b1 100644
--- a/csrank/discretechoice/ranknet_discrete_choice.py
+++ b/csrank/discretechoice/ranknet_discrete_choice.py
@@ -11,7 +11,6 @@
 class RankNetDiscreteChoiceFunction(RankNetCore, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -40,8 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -74,7 +71,6 @@ def __init__(
                 [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581).
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -89,11 +85,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(RankNetDiscreteChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def construct_model(self):
         return super().construct_model()
diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py
index c82a5726..aa04766b 100644
--- a/csrank/objectranking/rank_net.py
+++ b/csrank/objectranking/rank_net.py
@@ -13,7 +13,6 @@
 class RankNet(RankNetCore, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -40,8 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -76,7 +73,6 @@ def __init__(
 
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -91,11 +87,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(RankNet.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def construct_model(self):
         return super().construct_model()
diff --git a/docs/notebooks/Rank-Net-Choice.ipynb b/docs/notebooks/Rank-Net-Choice.ipynb
index 342aac22..d038a760 100644
--- a/docs/notebooks/Rank-Net-Choice.ipynb
+++ b/docs/notebooks/Rank-Net-Choice.ipynb
@@ -124,7 +124,6 @@
    "outputs": [],
    "source": [
     "ranknet = RankNetChoiceFunction(\n",
-    "    n_object_features=n_features,\n",
     "    optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))"
    ]
   },

From 795f6a349c9257ad0640e70ef0c72c28f30021a5 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:16:51 +0200
Subject: [PATCH 10/16] Determine data dimensionality lazily in CmpNet

---
 csrank/choicefunction/cmpnet_choice.py        | 10 +---------
 csrank/core/cmpnet_core.py                    | 20 +++++++++----------
 .../discretechoice/cmpnet_discrete_choice.py  | 10 +---------
 csrank/objectranking/cmp_net.py               | 10 +---------
 4 files changed, 13 insertions(+), 37 deletions(-)

diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
index d30e81a8..5c069ff3 100644
--- a/csrank/choicefunction/cmpnet_choice.py
+++ b/csrank/choicefunction/cmpnet_choice.py
@@ -12,7 +12,6 @@
 class CmpNetChoiceFunction(CmpNetCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -49,8 +48,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -80,7 +77,6 @@ def __init__(
 
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -95,11 +91,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(CmpNetChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def _convert_instances_(self, X, Y):
diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py
index 9aaca074..ef70dd27 100644
--- a/csrank/core/cmpnet_core.py
+++ b/csrank/core/cmpnet_core.py
@@ -22,7 +22,6 @@
 class CmpNetCore(Learner):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -37,7 +36,6 @@ def __init__(
         **kwargs
     ):
         self.logger = logging.getLogger("CmpNet")
-        self.n_object_features = n_object_features
         self.batch_normalization = batch_normalization
         self.activation = activation
         self.hash_file = None
@@ -62,20 +60,15 @@ def __init__(
         self.threshold_instances = int(1e10)
         self.random_state = random_state
         self.model = None
-        self._construct_layers(
-            kernel_regularizer=self.kernel_regularizer,
-            kernel_initializer=self.kernel_initializer,
-            activation=self.activation,
-            **self.kwargs
-        )
 
     def _construct_layers(self, **kwargs):
 
         self.output_node = Dense(
             1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
         )
-        self.x1 = Input(shape=(self.n_object_features,))
-        self.x2 = Input(shape=(self.n_object_features,))
+
+        self.x1 = Input(shape=(self.n_object_features_fit_,))
+        self.x2 = Input(shape=(self.n_object_features_fit_,))
         if self.batch_normalization:
             self.hidden_layers = [
                 NormalizedDense(self.n_units, name="hidden_{}".format(x), **kwargs)
@@ -157,9 +150,16 @@ def fit(
                 Keyword arguments for the fit function
         """
         self.random_state_ = check_random_state(self.random_state)
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         x1, x2, y_double = self._convert_instances_(X, Y)
 
         self.logger.debug("Instances created {}".format(x1.shape[0]))
+        self._construct_layers(
+            kernel_regularizer=self.kernel_regularizer,
+            kernel_initializer=self.kernel_initializer,
+            activation=self.activation,
+            **self.kwargs
+        )
         self.model = self.construct_model()
 
         self.logger.debug("Finished Creating the model, now fitting started")
diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py
index e5440688..457b26a5 100644
--- a/csrank/discretechoice/cmpnet_discrete_choice.py
+++ b/csrank/discretechoice/cmpnet_discrete_choice.py
@@ -11,7 +11,6 @@
 class CmpNetDiscreteChoiceFunction(CmpNetCore, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -46,8 +45,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -78,7 +75,6 @@ def __init__(
                 [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -93,11 +89,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(CmpNetDiscreteChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py
index ad218992..d86e01ea 100644
--- a/csrank/objectranking/cmp_net.py
+++ b/csrank/objectranking/cmp_net.py
@@ -13,7 +13,6 @@
 class CmpNet(CmpNetCore, ObjectRanker):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -50,8 +49,6 @@ def __init__(
 
            Parameters
            ----------
-           n_object_features : int
-               Number of features of the object space
            n_hidden : int
                Number of hidden layers used in the scoring network
            n_units : int
@@ -83,7 +80,6 @@ def __init__(
                 [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -98,11 +94,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(CmpNet.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")

From 393bf1968a0bfea6cec68dba14c9eb9173574381 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:14:11 +0200
Subject: [PATCH 11/16] Determine data dimensionality lazily in PairwiseSVM

---
 csrank/choicefunction/pairwise_choice.py          | 13 +++----------
 csrank/core/pairwise_svm.py                       |  7 ++-----
 csrank/discretechoice/pairwise_discrete_choice.py | 13 +++----------
 csrank/objectranking/rank_svm.py                  | 13 +++----------
 4 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/csrank/choicefunction/pairwise_choice.py b/csrank/choicefunction/pairwise_choice.py
index f6e5448a..890c30e6 100644
--- a/csrank/choicefunction/pairwise_choice.py
+++ b/csrank/choicefunction/pairwise_choice.py
@@ -10,7 +10,6 @@
 class PairwiseSVMChoiceFunction(PairwiseSVM, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         C=1.0,
         tol=1e-4,
         normalize=True,
@@ -30,8 +29,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             C : float, optional
                 Penalty parameter of the error term
             tol : float, optional
@@ -54,7 +51,6 @@ def __init__(
 
         """
         super().__init__(
-            n_object_features=n_object_features,
             C=C,
             tol=tol,
             normalize=normalize,
@@ -63,11 +59,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(PairwiseSVMChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def _convert_instances_(self, X, Y):
@@ -80,7 +72,7 @@ def _convert_instances_(self, X, Y):
             y_single,
         ) = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        assert x_train.shape[1] == self.n_object_features
+        assert x_train.shape[1] == self.n_object_features_fit_
         self.logger.debug(
             "Finished the Dataset with instances {}".format(x_train.shape[0])
         )
@@ -107,6 +99,7 @@ def fit(self, X, Y, tune_size=0.1, thin_thresholds=1, verbose=0, **kwd):
                 Keyword arguments for the fit function
 
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         if tune_size > 0:
             X_train, X_val, Y_train, Y_val = train_test_split(
                 X, Y, test_size=tune_size, random_state=self.random_state
diff --git a/csrank/core/pairwise_svm.py b/csrank/core/pairwise_svm.py
index 23bbffba..d63197f1 100644
--- a/csrank/core/pairwise_svm.py
+++ b/csrank/core/pairwise_svm.py
@@ -13,7 +13,6 @@
 class PairwiseSVM(Learner):
     def __init__(
         self,
-        n_object_features,
         C=1.0,
         tol=1e-4,
         normalize=True,
@@ -25,8 +24,6 @@ def __init__(
 
         Parameters
         ----------
-        n_object_features : int
-            Number of features of the object space
         C : float, optional
             Penalty parameter of the error term
         tol : float, optional
@@ -45,7 +42,6 @@ def __init__(
             [1] Joachims, T. (2002, July). "Optimizing search engines using clickthrough data.", Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 133-142). ACM.
         """
         self.normalize = normalize
-        self.n_object_features = n_object_features
         self.C = C
         self.tol = tol
         self.logger = logging.getLogger("RankSVM")
@@ -71,6 +67,7 @@ def fit(self, X, Y, **kwargs):
 
         """
         self.random_state_ = check_random_state(self.random_state)
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         x_train, y_single = self._convert_instances_(X, Y)
         if x_train.shape[0] > self.threshold_instances:
             self.model = LogisticRegression(
@@ -101,7 +98,7 @@ def fit(self, X, Y, **kwargs):
         self.logger.debug("Fitting Complete")
 
     def _predict_scores_fixed(self, X, **kwargs):
-        assert X.shape[-1] == self.n_object_features
+        assert X.shape[-1] == self.n_object_features_fit_
         self.logger.info(
             "For Test instances {} objects {} features {}".format(*X.shape)
         )
diff --git a/csrank/discretechoice/pairwise_discrete_choice.py b/csrank/discretechoice/pairwise_discrete_choice.py
index ed6efdc4..5200007c 100644
--- a/csrank/discretechoice/pairwise_discrete_choice.py
+++ b/csrank/discretechoice/pairwise_discrete_choice.py
@@ -8,7 +8,6 @@
 class PairwiseSVMDiscreteChoiceFunction(PairwiseSVM, DiscreteObjectChooser):
     def __init__(
         self,
-        n_object_features,
         C=1.0,
         tol=1e-4,
         normalize=True,
@@ -28,8 +27,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             C : float, optional
                 Penalty parameter of the error term
             tol : float, optional
@@ -51,7 +48,6 @@ def __init__(
                 [2] Sebastián Maldonado, Ricardo Montoya, and Richard Weber. „Advanced conjoint analysis using feature selection via support vector machines“. In: European Journal of Operational Research 241.2 (2015), pp. 564 –574.
         """
         super().__init__(
-            n_object_features=n_object_features,
             C=C,
             tol=tol,
             normalize=normalize,
@@ -60,11 +56,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(PairwiseSVMDiscreteChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
@@ -76,13 +68,14 @@ def _convert_instances_(self, X, Y):
             y_single,
         ) = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        assert x_train.shape[1] == self.n_object_features
+        assert x_train.shape[1] == self.n_object_features_fit_
         self.logger.debug(
             "Finished the Dataset with instances {}".format(x_train.shape[0])
         )
         return x_train, y_single
 
     def fit(self, X, Y, **kwd):
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         super().fit(X, Y, **kwd)
 
     def _predict_scores_fixed(self, X, **kwargs):
diff --git a/csrank/objectranking/rank_svm.py b/csrank/objectranking/rank_svm.py
index de26bc24..f59d5a45 100644
--- a/csrank/objectranking/rank_svm.py
+++ b/csrank/objectranking/rank_svm.py
@@ -10,7 +10,6 @@
 class RankSVM(ObjectRanker, PairwiseSVM):
     def __init__(
         self,
-        n_object_features,
         C=1.0,
         tol=1e-4,
         normalize=True,
@@ -30,8 +29,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             C : float, optional
                 Penalty parameter of the error term
             tol : float, optional
@@ -50,7 +47,6 @@ def __init__(
                 [1] Joachims, T. (2002, July). "Optimizing search engines using clickthrough data.", Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 133-142). ACM.
         """
         super().__init__(
-            n_object_features=n_object_features,
             C=C,
             tol=tol,
             normalize=normalize,
@@ -59,13 +55,10 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(RankSVM.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
 
     def fit(self, X, Y, **kwargs):
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         super().fit(X, Y, **kwargs)
 
     def _convert_instances_(self, X, Y):
@@ -78,7 +71,7 @@ def _convert_instances_(self, X, Y):
             y_single,
         ) = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        assert x_train.shape[1] == self.n_object_features
+        assert x_train.shape[1] == self.n_object_features_fit_
         self.logger.debug(
             "Finished the Dataset with instances {}".format(x_train.shape[0])
         )

From 4950b3b2780fadf80c8439180ce15bde51283933 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:44:20 +0200
Subject: [PATCH 12/16] Determine data dimensionality lazily in
 GeneralizedLinearModel

---
 csrank/choicefunction/generalized_linear_model.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/csrank/choicefunction/generalized_linear_model.py b/csrank/choicefunction/generalized_linear_model.py
index 5db21f25..fc68687c 100644
--- a/csrank/choicefunction/generalized_linear_model.py
+++ b/csrank/choicefunction/generalized_linear_model.py
@@ -30,9 +30,7 @@
 
 
 class GeneralizedLinearModel(ChoiceFunctions, Learner):
-    def __init__(
-        self, n_object_features, regularization="l2", random_state=None, **kwargs
-    ):
+    def __init__(self, regularization="l2", random_state=None, **kwargs):
         """
             Create an instance of the GeneralizedLinearModel model for learning the choice function. This model is
             adapted from the multinomial logit model :class:`csrank.discretechoice.multinomial_logit_model.MultinomialLogitModel`.
@@ -52,8 +50,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             regularization : string, optional
                 Regularization technique to be used for estimating the weights
             random_state : int or object
@@ -68,7 +64,6 @@ def __init__(
                 [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986
         """
         self.logger = logging.getLogger(GeneralizedLinearModel.__name__)
-        self.n_object_features = n_object_features
         if regularization in ["l1", "l2"]:
             self.regularization = regularization
         else:
@@ -156,8 +151,8 @@ def construct_model(self, X, Y):
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
-            shapes = {"weights": self.n_object_features}
-            # shapes = {'weights': (self.n_object_features, 3)}
+            shapes = {"weights": self.n_object_features_fit_}
+            # shapes = {'weights': (self.n_object_features_fit_, 3)}
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
             intercept = pm.Normal("intercept", mu=0, sd=10)
             utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept
@@ -274,6 +269,7 @@ def _fit(
         },
         **kwargs
     ):
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
@@ -281,7 +277,7 @@ def _predict_scores_fixed(self, X, **kwargs):
         d = dict(pm.summary(self.trace)["mean"])
         intercept = 0.0
         weights = np.array(
-            [d["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)]
         )
         if "intercept" in d:
             intercept = intercept + d["intercept"]

From f125f12599fef1cff60f035d8e5fb75640b683ae Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:45:38 +0200
Subject: [PATCH 13/16] Determine data dimensionality lazily in MixedLogitModel

---
 csrank/discretechoice/mixed_logit_model.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/csrank/discretechoice/mixed_logit_model.py b/csrank/discretechoice/mixed_logit_model.py
index 4501c98d..2a41ea24 100644
--- a/csrank/discretechoice/mixed_logit_model.py
+++ b/csrank/discretechoice/mixed_logit_model.py
@@ -31,14 +31,7 @@
 
 
 class MixedLogitModel(DiscreteObjectChooser, Learner):
-    def __init__(
-        self,
-        n_object_features,
-        n_mixtures=4,
-        loss_function="",
-        regularization="l2",
-        **kwargs
-    ):
+    def __init__(self, n_mixtures=4, loss_function="", regularization="l2", **kwargs):
         """
             Create an instance of the Mixed Logit model for learning the discrete choice function. In this model we
             assume weights of this model to be random due to which this model can learn different variations in choices
@@ -59,8 +52,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_mixtures: int (range : [2, inf])
                 The number of logit models (:math:`R`) which are used to estimate the choice probability
             loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’}
@@ -81,7 +72,6 @@ def __init__(
                 [3] Daniel McFadden and Kenneth Train. „Mixed MNL models for discrete response“. In: Journal of applied Econometrics 15.5 (2000), pp. 447–470
         """
         self.logger = logging.getLogger(MixedLogitModel.__name__)
-        self.n_object_features = n_object_features
         self.loss_function = likelihood_dict.get(loss_function, None)
         if regularization in ["l1", "l2"]:
             self.regularization = regularization
@@ -166,7 +156,7 @@ def construct_model(self, X, Y):
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
-            shapes = {"weights": (self.n_object_features, self.n_mixtures)}
+            shapes = {"weights": (self.n_object_features_fit_, self.n_mixtures)}
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
             utility = tt.dot(self.Xt, weights_dict["weights"])
             self.p = tt.mean(ttu.softmax(utility, axis=1), axis=2)
@@ -225,13 +215,14 @@ def fit(
             **kwargs :
                 Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
     def _predict_scores_fixed(self, X, **kwargs):
         summary = dict(pm.summary(self.trace)["mean"])
-        weights = np.zeros((self.n_object_features, self.n_mixtures))
-        for i, k in product(range(self.n_object_features), range(self.n_mixtures)):
+        weights = np.zeros((self.n_object_features_fit_, self.n_mixtures))
+        for i, k in product(range(self.n_object_features_fit_), range(self.n_mixtures)):
             weights[i][k] = summary["weights[{},{}]".format(i, k)]
         utility = np.dot(X, weights)
         p = np.mean(npu.softmax(utility, axis=1), axis=2)

From 3e3df20a80268db93bb1ecb0490a63e326f391d3 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:46:01 +0200
Subject: [PATCH 14/16] Determine data dimensionality lazily in
 MultinomialLogitModel

---
 csrank/discretechoice/multinomial_logit_model.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/csrank/discretechoice/multinomial_logit_model.py b/csrank/discretechoice/multinomial_logit_model.py
index 085b85ab..eb0d5d48 100644
--- a/csrank/discretechoice/multinomial_logit_model.py
+++ b/csrank/discretechoice/multinomial_logit_model.py
@@ -29,9 +29,7 @@
 
 
 class MultinomialLogitModel(DiscreteObjectChooser, Learner):
-    def __init__(
-        self, n_object_features, loss_function="", regularization="l2", **kwargs
-    ):
+    def __init__(self, loss_function="", regularization="l2", **kwargs):
         """
             Create an instance of the Multinomial Logit model for learning the discrete choice function. The utility
             score for each object in query set :math:`Q` is defined as :math:`U(x) = w \\cdot x`, where :math:`w` is
@@ -50,8 +48,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             loss_function : string , {‘categorical_crossentropy’, ‘binary_crossentropy’, ’categorical_hinge’}
                 Loss function to be used for the discrete choice decision from the query set
             regularization : string, {‘l1’, ‘l2’}, string
@@ -68,7 +64,6 @@ def __init__(
                 [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986
         """
         self.logger = logging.getLogger(MultinomialLogitModel.__name__)
-        self.n_object_features = n_object_features
         self.loss_function = likelihood_dict.get(loss_function, None)
         if regularization in ["l1", "l2"]:
             self.regularization = regularization
@@ -157,8 +152,8 @@ def construct_model(self, X, Y):
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
-            shapes = {"weights": self.n_object_features}
-            # shapes = {'weights': (self.n_object_features, 3)}
+            shapes = {"weights": self.n_object_features_fit_}
+            # shapes = {'weights': (self.n_object_features_fit_, 3)}
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
             intercept = pm.Normal("intercept", mu=0, sd=10)
             utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept
@@ -219,6 +214,7 @@ def fit(
             **kwargs :
                 Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
@@ -226,7 +222,7 @@ def _predict_scores_fixed(self, X, **kwargs):
         d = dict(pm.summary(self.trace)["mean"])
         intercept = 0.0
         weights = np.array(
-            [d["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)]
         )
         if "intercept" in d:
             intercept = intercept + d["intercept"]

From f6db015b50102d3ed04e95ab9a2749b29f5e0e9f Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:46:46 +0200
Subject: [PATCH 15/16] Remove unneeded init argument in ExpectedRankRegression

---
 csrank/objectranking/expected_rank_regression.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/csrank/objectranking/expected_rank_regression.py b/csrank/objectranking/expected_rank_regression.py
index 6f063c86..61155e48 100644
--- a/csrank/objectranking/expected_rank_regression.py
+++ b/csrank/objectranking/expected_rank_regression.py
@@ -18,7 +18,6 @@
 class ExpectedRankRegression(ObjectRanker, Learner):
     def __init__(
         self,
-        n_object_features,
         alpha=0.0,
         l1_ratio=0.5,
         tol=1e-4,
@@ -46,8 +45,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             alpha : float, optional
                 Regularization strength
             l1_ratio : float, optional
@@ -68,7 +65,6 @@ def __init__(
                 [1] Kamishima, T., Kazawa, H., & Akaho, S. (2005, November). "Supervised ordering-an empirical survey.", Fifth IEEE International Conference on Data Mining.
         """
         self.normalize = normalize
-        self.n_object_features = n_object_features
         self.alpha = alpha
         self.l1_ratio = l1_ratio
         self.tol = tol
@@ -96,7 +92,6 @@ def fit(self, X, Y, **kwargs):
         self.random_state_ = check_random_state(self.random_state)
         self.logger.debug("Creating the Dataset")
         x_train, y_train = complete_linear_regression_dataset(X, Y)
-        assert x_train.shape[1] == self.n_object_features
         self.logger.debug("Finished the Dataset")
         if self.alpha < 1e-3:
             self.model = LinearRegression(

From 7c0eef03dbb7c2ecef902f4fb070d9aef2f6ddb8 Mon Sep 17 00:00:00 2001
From: Timo Kaufmann <timokau@zoho.com>
Date: Wed, 20 May 2020 20:03:40 +0200
Subject: [PATCH 16/16] Do not pass data dimensionality to learners

After the last few commits the learners no longer need that information
at initialization time. Instead, they determine it from the data when
fitting.
---
 csrank/tests/test_choice_functions.py | 1 -
 csrank/tests/test_discrete_choice.py  | 1 -
 csrank/tests/test_ranking.py          | 1 -
 3 files changed, 3 deletions(-)

diff --git a/csrank/tests/test_choice_functions.py b/csrank/tests/test_choice_functions.py
index bdddb957..b6158036 100644
--- a/csrank/tests/test_choice_functions.py
+++ b/csrank/tests/test_choice_functions.py
@@ -92,7 +92,6 @@ def test_choice_function_fixed(trivial_choice_problem, name):
     x, y = trivial_choice_problem
     choice_function = choice_functions[name][0]
     params, accuracies = choice_functions[name][1], choice_functions[name][2]
-    params["n_objects"], params["n_object_features"] = tuple(x.shape[1:])
     learner = choice_function(**params)
     if name == GLM_CHOICE:
         learner.fit(
diff --git a/csrank/tests/test_discrete_choice.py b/csrank/tests/test_discrete_choice.py
index a0ec3889..5a2e0223 100644
--- a/csrank/tests/test_discrete_choice.py
+++ b/csrank/tests/test_discrete_choice.py
@@ -100,7 +100,6 @@ def test_discrete_choice_function_fixed(trivial_discrete_choice_problem, name):
         discrete_choice_functions[name][1],
         discrete_choice_functions[name][2],
     )
-    params["n_objects"], params["n_object_features"] = tuple(x.shape[1:])
     learner = choice_function(**params)
     if name in [MNL, NLM, GEV, PCL, MLM]:
         learner.fit(
diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py
index b719d83a..d2e213f9 100644
--- a/csrank/tests/test_ranking.py
+++ b/csrank/tests/test_ranking.py
@@ -110,7 +110,6 @@ def test_object_ranker_fixed(trivial_ranking_problem, ranker_name):
     np.random.seed(123)
     x, y = trivial_ranking_problem
     ranker, params, (loss, acc) = object_rankers[ranker_name]
-    params["n_objects"], params["n_object_features"] = tuple(x.shape[1:])
     ranker = ranker(**params)
     if "linear" in ranker_name:
         ranker.fit(x, y, epochs=10, validation_split=0, verbose=False)