kiudee · timokau · Aug 3, 2020 · Aug 1, 2020 · Aug 1, 2020
diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
@@ -100,13 +100,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         x1, x2, garbage, y_double, garbage = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_double = y_double[indices, :]
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_double
 

diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py
@@ -97,14 +97,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_single = y_single[indices]
-            self.logger.debug("Sampling instances")
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_single
 

diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py
@@ -54,7 +54,6 @@ def __init__(
             if key not in allowed_dense_kwargs:
                 del kwargs[key]
         self.kwargs = kwargs
-        self.threshold_instances = int(1e10)
         self.random_state = random_state
         self.model = None
 

diff --git a/csrank/core/pairwise_svm.py b/csrank/core/pairwise_svm.py
@@ -16,6 +16,7 @@ def __init__(
         tol=1e-4,
         normalize=True,
         fit_intercept=True,
+        use_logistic_regression=False,
         random_state=None,
         **kwargs,
     ):
@@ -31,6 +32,10 @@ def __init__(
             If True, the data will be normalized before fitting.
         fit_intercept : bool, optional
             If True, the linear model will also fit an intercept.
+        use_logistic_regression : bool, optional
+            Whether to fit a Linear Support Vector machine or a Logistic
+            Regression model. You may want to prefer the simpler Logistic
+            Regression model on a large sample size.
         random_state : int, RandomState instance or None, optional
             Seed of the pseudorandom generator or a RandomState instance
         **kwargs
@@ -44,8 +49,8 @@ def __init__(
         self.C = C
         self.tol = tol
         self.logger = logging.getLogger("RankSVM")
+        self.use_logistic_regression = use_logistic_regression
         self.random_state = random_state
-        self.threshold_instances = int(1e10)
         self.fit_intercept = fit_intercept
         self.weights = None
         self.model = None
@@ -68,7 +73,7 @@ def fit(self, X, Y, **kwargs):
         self.random_state_ = check_random_state(self.random_state)
         _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         x_train, y_single = self._convert_instances_(X, Y)
-        if x_train.shape[0] > self.threshold_instances:
+        if self.use_logistic_regression:
             self.model = LogisticRegression(
                 C=self.C,
                 tol=self.tol,

diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py
@@ -47,7 +47,6 @@ def __init__(
             if key not in allowed_dense_kwargs:
                 del kwargs[key]
         self.kwargs = kwargs
-        self.threshold_instances = int(1e10)
         self.batch_size = batch_size
         self._scoring_model = None
         self.model = None

diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py
@@ -97,13 +97,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         x1, x2, garbage, y_double, garbage = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_double = y_double[indices, :]
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_double
 

diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py
@@ -96,13 +96,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_single = y_single[indices]
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_single
 

diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py
@@ -104,13 +104,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         garbage, x1, x2, y_double, garbage = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state_.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_double = y_double[indices, :]
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_double
 

diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py
@@ -105,7 +105,6 @@ def __init__(
                 del kwargs[key]
         self.kwargs = kwargs
 
-        self.threshold_instances = int(1e10)
         self.batch_size = batch_size
         self.random_state = random_state
         self.hash_file = None

diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py
@@ -98,13 +98,6 @@ def _convert_instances_(self, X, Y):
         self.logger.debug("Creating the Dataset")
         garbage, x1, x2, garbage, y_single = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(
-                x1.shape[0], self.threshold_instances, replace=False
-            )
-            x1 = x1[indices, :]
-            x2 = x2[indices, :]
-            y_single = y_single[indices]
         self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_single