diff --git a/csrank/choicefunction/generalized_linear_model.py b/csrank/choicefunction/generalized_linear_model.py index 6e4b63ae..5db21f25 100644 --- a/csrank/choicefunction/generalized_linear_model.py +++ b/csrank/choicefunction/generalized_linear_model.py @@ -73,7 +73,7 @@ def __init__( self.regularization = regularization else: self.regularization = "l2" - self.random_state = check_random_state(random_state) + self.random_state = random_state self.model = None self.trace = None self.trace_vi = None @@ -224,9 +224,10 @@ def fit( **kwargs : Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) if tune_size > 0: X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state + X, Y, test_size=tune_size, random_state=self.random_state_ ) try: self._fit( diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 35b2ab96..9aaca074 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -60,7 +60,7 @@ def __init__( del kwargs[key] self.kwargs = kwargs self.threshold_instances = int(1e10) - self.random_state = check_random_state(random_state) + self.random_state = random_state self.model = None self._construct_layers( kernel_regularizer=self.kernel_regularizer, @@ -156,6 +156,7 @@ def fit( **kwd : Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) x1, x2, y_double = self._convert_instances_(X, Y) self.logger.debug("Instances created {}".format(x1.shape[0])) diff --git a/csrank/core/fate_linear.py b/csrank/core/fate_linear.py index 6299bf1c..30de1dd7 100644 --- a/csrank/core/fate_linear.py +++ b/csrank/core/fate_linear.py @@ -28,7 +28,7 @@ def __init__( self.n_hidden_set_units = n_hidden_set_units self.learning_rate = learning_rate self.batch_size = batch_size - self.random_state = check_random_state(random_state) + self.random_state = random_state self.n_object_features = n_object_features self.loss_function = loss_function self.n_objects = n_objects @@ -46,17 +46,17 @@ def _construct_model_(self, n_objects): self.Y = tf.placeholder("float32", [None, n_objects]) std = 1 / np.sqrt(self.n_object_features) self.b1 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=self.n_hidden_set_units), + self.random_state_.normal(loc=0, scale=std, size=self.n_hidden_set_units), dtype=tf.float32, ) self.W1 = tf.Variable( - self.random_state.normal( + self.random_state_.normal( loc=0, scale=std, size=(self.n_object_features, self.n_hidden_set_units) ), dtype=tf.float32, ) self.W2 = tf.Variable( - self.random_state.normal( + self.random_state_.normal( loc=0, scale=std, size=(self.n_object_features + self.n_hidden_set_units), @@ -64,7 +64,7 @@ def _construct_model_(self, n_objects): dtype=tf.float32, ) self.b2 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32 + self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 ) set_rep = ( @@ -93,6 +93,7 @@ def step_decay(self, epoch): def fit( self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd ): + self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer n_instances, n_objects, n_features = X.shape assert n_features == self.n_object_features @@ -178,7 +179,6 @@ def set_tunable_parameters( self.n_hidden_set_units = n_hidden_set_units self.batch_size = batch_size self.learning_rate = learning_rate - self._construct_model_(self.n_objects) self.epochs_drop = epochs_drop self.drop = drop if len(point) > 0: diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 418c8efc..518a7a4e 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -60,7 +60,7 @@ def __init__( Keyword arguments for the hidden units """ self.logger = logging.getLogger(FATENetworkCore.__name__) - self.random_state = check_random_state(random_state) + self.random_state = random_state self.n_hidden_joint_layers = n_hidden_joint_layers self.n_hidden_joint_units = n_hidden_joint_units @@ -500,6 +500,7 @@ def fit( **kwargs : Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) self._fit( X=X, Y=Y, diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py index 797b9b24..40dd11a1 100644 --- a/csrank/core/feta_linear.py +++ b/csrank/core/feta_linear.py @@ -27,7 +27,7 @@ def __init__( ): self.learning_rate = learning_rate self.batch_size = batch_size - self.random_state = check_random_state(random_state) + self.random_state = random_state self.n_object_features = n_object_features self.loss_function = loss_function self.n_objects = n_objects @@ -47,21 +47,23 @@ def _construct_model_(self, n_objects): self.Y = tf.placeholder("float32", [None, n_objects]) std = 1 / np.sqrt(self.n_object_features) self.b1 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32 + self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 ) self.W1 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=2 * self.n_object_features), + self.random_state_.normal( + loc=0, scale=std, size=2 * self.n_object_features + ), dtype=tf.float32, ) self.W2 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=self.n_object_features), + self.random_state_.normal(loc=0, scale=std, size=self.n_object_features), dtype=tf.float32, ) self.b2 = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32 + self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 ) self.W_out = tf.Variable( - self.random_state.normal(loc=0, scale=std, size=2), + self.random_state_.normal(loc=0, scale=std, size=2), dtype=tf.float32, name="W_out", ) @@ -97,6 +99,7 @@ def step_decay(self, epoch): def fit( self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd ): + self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer n_instances, n_objects, n_features = X.shape assert n_features == self.n_object_features @@ -183,7 +186,6 @@ def set_tunable_parameters( """ self.batch_size = batch_size self.learning_rate = learning_rate - self._construct_model_(self.n_objects) self.epochs_drop = epochs_drop self.drop = drop if len(point) > 0: diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 23ebdcf2..6088acac 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -45,7 +45,7 @@ def __init__( **kwargs ): self.logger = logging.getLogger(FETANetwork.__name__) - self.random_state = check_random_state(random_state) + self.random_state = random_state self.kernel_regularizer = kernel_regularizer self.kernel_initializer = kernel_initializer self.batch_normalization = batch_normalization @@ -290,6 +290,7 @@ def fit( Keyword arguments for the fit function """ self.logger.debug("Enter fit function...") + self.random_state_ = check_random_state(self.random_state) X, Y = self.sub_sampling(X, Y) self.model = self.construct_model() @@ -311,7 +312,7 @@ def fit( def sub_sampling(self, X, Y): if self._n_objects > self.max_number_of_objects: bucket_size = int(self._n_objects / self.max_number_of_objects) - idx = self.random_state.randint(bucket_size, size=(len(X), self.n_objects)) + idx = self.random_state_.randint(bucket_size, size=(len(X), self.n_objects)) # TODO: subsampling multiple rankings idx += np.arange(start=0, stop=self._n_objects, step=bucket_size)[ : self.n_objects diff --git a/csrank/core/pairwise_svm.py b/csrank/core/pairwise_svm.py index 03f1d313..23bbffba 100644 --- a/csrank/core/pairwise_svm.py +++ b/csrank/core/pairwise_svm.py @@ -49,7 +49,7 @@ def __init__( self.C = C self.tol = tol self.logger = logging.getLogger("RankSVM") - self.random_state = check_random_state(random_state) + self.random_state = random_state self.threshold_instances = int(1e10) self.fit_intercept = fit_intercept self.weights = None @@ -70,13 +70,14 @@ def fit(self, X, Y, **kwargs): Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) x_train, y_single = self._convert_instances_(X, Y) if x_train.shape[0] > self.threshold_instances: self.model = LogisticRegression( C=self.C, tol=self.tol, fit_intercept=self.fit_intercept, - random_state=self.random_state, + random_state=self.random_state_, ) self.logger.info("Logistic Regression model ") else: @@ -84,7 +85,7 @@ def fit(self, X, Y, **kwargs): C=self.C, tol=self.tol, fit_intercept=self.fit_intercept, - random_state=self.random_state, + random_state=self.random_state_, ) self.logger.info("Linear SVC model ") diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index c47a7bba..6c7b5203 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -57,7 +57,7 @@ def __init__( self._scoring_model = None self.model = None self.hash_file = None - self.random_state = check_random_state(random_state) + self.random_state = random_state self._construct_layers( kernel_regularizer=self.kernel_regularizer, kernel_initializer=self.kernel_initializer, @@ -148,6 +148,7 @@ def fit( **kwd : Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) X1, X2, Y_single = self._convert_instances_(X, Y) self.logger.debug("Instances created {}".format(X1.shape[0])) diff --git a/csrank/discretechoice/baseline.py b/csrank/discretechoice/baseline.py index 1b258aec..8c4a4a7a 100644 --- a/csrank/discretechoice/baseline.py +++ b/csrank/discretechoice/baseline.py @@ -15,15 +15,15 @@ def __init__(self, random_state=None, **kwargs): """ self.logger = logging.getLogger(RandomBaselineDC.__name__) - self.random_state = check_random_state(random_state) + self.random_state = random_state self.model = None def fit(self, X, Y, **kwd): - pass + self.random_state_ = check_random_state(self.random_state) def _predict_scores_fixed(self, X, **kwargs): n_instances, n_objects, n_features = X.shape - return self.random_state.rand(n_instances, n_objects) + return self.random_state_.rand(n_instances, n_objects) def predict_scores(self, X, **kwargs): return super().predict_scores(X, **kwargs) diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py index 4cd066b8..d826b2f7 100644 --- a/csrank/discretechoice/generalized_nested_logit.py +++ b/csrank/discretechoice/generalized_nested_logit.py @@ -101,7 +101,7 @@ def __init__( self.alpha = alpha self.loss_function = likelihood_dict.get(loss_function, None) - self.random_state = check_random_state(random_state) + self.random_state = random_state if regularization in ["l1", "l2"]: self.regularization = regularization else: @@ -261,9 +261,10 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.random_state_ = check_random_state(self.random_state) if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) - indices = self.random_state.choice(X.shape[0], upper_bound, replace=False) + indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) X = X[indices, :, :] Y = Y[indices, :] self.logger.info( diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py index 9aa74091..e286a67a 100644 --- a/csrank/discretechoice/nested_logit_model.py +++ b/csrank/discretechoice/nested_logit_model.py @@ -99,7 +99,7 @@ def __init__( else: self.n_nests = n_nests self.alpha = alpha - self.random_state = check_random_state(random_state) + self.random_state = random_state self.loss_function = likelihood_dict.get(loss_function, None) if regularization in ["l1", "l2"]: self.regularization = regularization @@ -192,11 +192,12 @@ def create_nests(self, X): (n_instances, n_objects) Values for each object implying the nest it belongs to. For example for :math:`2` nests the value 0 implies that object is allocated to nest 1 and value 1 implies it is allocated to nest 2. """ + self.random_state_ = self.random_state_ n, n_obj, n_dim = X.shape objects = X.reshape(n * n_obj, n_dim) if self.cluster_model is None: self.cluster_model = MiniBatchKMeans( - n_clusters=self.n_nests, random_state=self.random_state + n_clusters=self.n_nests, random_state=self.random_state_ ).fit(objects) self.features_nests = self.cluster_model.cluster_centers_ prediction = self.cluster_model.labels_ @@ -321,7 +322,7 @@ def construct_model(self, X, Y): """ if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) - indices = self.random_state.choice(X.shape[0], upper_bound, replace=False) + indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) X = X[indices, :, :] Y = Y[indices, :] self.logger.info( @@ -399,6 +400,7 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + self.random_state_ = check_random_state(self.random_state) self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py index 81d029b5..b8262843 100644 --- a/csrank/discretechoice/paired_combinatorial_logit.py +++ b/csrank/discretechoice/paired_combinatorial_logit.py @@ -98,7 +98,7 @@ def __init__( self.nests_indices = np.array(list(combinations(np.arange(n_objects), 2))) self.n_nests = len(self.nests_indices) self.alpha = alpha - self.random_state = check_random_state(random_state) + self.random_state = random_state self.loss_function = likelihood_dict.get(loss_function, None) if regularization in ["l1", "l2"]: self.regularization = regularization @@ -329,6 +329,7 @@ def fit( **kwargs : Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample` """ + self.random_state_ = check_random_state(self.random_state) self.construct_model(X, Y) fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs) diff --git a/csrank/objectranking/baseline.py b/csrank/objectranking/baseline.py index fe424eac..c69cb6fc 100644 --- a/csrank/objectranking/baseline.py +++ b/csrank/objectranking/baseline.py @@ -15,15 +15,15 @@ def __init__(self, random_state=None, **kwargs): """ self.logger = logging.getLogger(RandomBaselineRanker.__name__) - self.random_state = check_random_state(random_state) + self.random_state = (random_state,) self.model = None def fit(self, X, Y, **kwd): - pass + self.random_state_ = check_random_state(self.random_state) def _predict_scores_fixed(self, X, **kwargs): n_instances, n_objects, n_features = X.shape - return self.random_state.rand(n_instances, n_objects) + return self.random_state_.rand(n_instances, n_objects) def predict_scores(self, X, **kwargs): return super().predict_scores(X, **kwargs) diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 59511b10..ad218992 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -109,7 +109,7 @@ def _convert_instances_(self, X, Y): garbage, x1, x2, y_double, garbage = generate_complete_pairwise_dataset(X, Y) del garbage if x1.shape[0] > self.threshold_instances: - indices = self.random_state.choice( + indices = self.random_state_.choice( x1.shape[0], self.threshold_instances, replace=False ) x1 = x1[indices, :] diff --git a/csrank/objectranking/expected_rank_regression.py b/csrank/objectranking/expected_rank_regression.py index f34c1e88..6f063c86 100644 --- a/csrank/objectranking/expected_rank_regression.py +++ b/csrank/objectranking/expected_rank_regression.py @@ -74,7 +74,7 @@ def __init__( self.tol = tol self.logger = logging.getLogger(ExpectedRankRegression.__name__) self.fit_intercept = fit_intercept - self.random_state = check_random_state(random_state) + self.random_state = random_state self.weights = None def fit(self, X, Y, **kwargs): @@ -93,6 +93,7 @@ def fit(self, X, Y, **kwargs): **kwargs Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) self.logger.debug("Creating the Dataset") x_train, y_train = complete_linear_regression_dataset(X, Y) assert x_train.shape[1] == self.n_object_features @@ -110,7 +111,7 @@ def fit(self, X, Y, **kwargs): normalize=self.normalize, tol=self.tol, fit_intercept=self.fit_intercept, - random_state=self.random_state, + random_state=self.random_state_, ) self.logger.info("Elastic Net") else: @@ -119,7 +120,7 @@ def fit(self, X, Y, **kwargs): normalize=self.normalize, tol=self.tol, fit_intercept=self.fit_intercept, - random_state=self.random_state, + random_state=self.random_state_, ) self.logger.info("Ridge") self.logger.debug("Finished Creating the model, now fitting started") diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index d6b8aa24..076d0c07 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -117,7 +117,7 @@ def __init__( self.threshold_instances = int(1e10) self.batch_size = batch_size - self.random_state = check_random_state(random_state) + self.random_state = random_state self.hash_file = None self.model = None self._scoring_model = None @@ -179,6 +179,7 @@ def fit( **kwd Keyword arguments for the fit function """ + self.random_state_ = check_random_state(self.random_state) self.n_objects = X.shape[1] self.logger.debug("Creating top-k dataset") X, Y = self._create_topk(X, Y) diff --git a/csrank/tuning.py b/csrank/tuning.py index a1318813..de5cbf01 100644 --- a/csrank/tuning.py +++ b/csrank/tuning.py @@ -160,7 +160,7 @@ def __init__( else: self._fit_params = fit_params - self.random_state = check_random_state(random_state) + self.random_state = random_state self.model = None self.opt = None @@ -240,6 +240,7 @@ def fit( **kwargs ): start = datetime.now() + self.random_state_ = check_random_state(self.random_state) def splitter(itr): for train_idx, test_idx in itr: @@ -263,7 +264,7 @@ def splitter_dict(itr_dict): if cv_iter is None: cv_iter = ShuffleSplit( - n_splits=3, test_size=0.1, random_state=self.random_state + n_splits=3, test_size=0.1, random_state=self.random_state_, ) if isinstance(X, dict): splits = dict() @@ -278,11 +279,11 @@ def splitter_dict(itr_dict): # Here we fix a random seed for all simulations to correlate the random # streams: - seed = self.random_state.randint(2 ** 32, dtype="uint32") + seed = self.random_state_.randint(2 ** 32, dtype="uint32") self.logger.debug("Random seed for the ranking algorithm: {}".format(seed)) - opt_seed = self.random_state.randint(2 ** 32, dtype="uint32") + opt_seed = self.random_state_.randint(2 ** 32, dtype="uint32") self.logger.debug("Random seed for the optimizer: {}".format(opt_seed)) - gp_seed = self.random_state.randint(2 ** 32, dtype="uint32") + gp_seed = self.random_state_.randint(2 ** 32, dtype="uint32") self.logger.debug("Random seed for the GP surrogate: {}".format(gp_seed)) n_iter = self.set_optimizer(n_iter, opt_seed, acq_func, gp_seed, **kwargs)