From 727d61cf86925908615a5af4520d4b2a960daac1 Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Mon, 27 Jan 2020 00:29:19 +0100
Subject: [PATCH 01/11] Condensed commits for all changes in new branch

---
 csrank/metrics.py                  |  39 ++-
 csrank/objectranking/lambdamart.py | 376 +++++++++++++++++++++++++++++
 csrank/tests/test_ranking.py       |   2 +
 3 files changed, 416 insertions(+), 1 deletion(-)
 create mode 100644 csrank/objectranking/lambdamart.py

diff --git a/csrank/metrics.py b/csrank/metrics.py
index eb554449..bc2e338d 100644
--- a/csrank/metrics.py
+++ b/csrank/metrics.py
@@ -47,6 +47,7 @@
 import numpy as np
 import tensorflow as tf
 from keras import backend as K
+import math
 
 from csrank.tensorflow_util import scores_to_rankings, get_instances_objects, tensorify
 
@@ -54,7 +55,8 @@
            'zero_one_rank_loss_for_scores_ties',
            'make_ndcg_at_k_loss', 'kendalls_tau_for_scores',
            'spearman_correlation_for_scores', "zero_one_accuracy",
-           "zero_one_accuracy_for_scores", "topk_categorical_accuracy"]
+           "zero_one_accuracy_for_scores", "topk_categorical_accuracy",
+           "point_dcg", "dcg", "ndcg"]
 
 
 def zero_one_rank_loss(y_true, y_pred):
@@ -331,3 +333,38 @@ def err(y_true, y_pred, utility_function=None, probability_mapping=None):
     results = tf.reduce_sum(discounted_document_values, axis=1)
 
     return K.mean(results)
+
+def point_dcg(args):
+    """
+        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
+    """
+    pos, label = args
+    return (2 ** label - 1) / math.log(pos + 2, 2)
+
+def dcg(preds):
+    """
+        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
+    """
+    return sum(map(point_dcg, enumerate(preds)))
+
+def ndcg(preds, k=10):
+    """
+        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
+    """
+    ideal_top = preds[:k]
+
+    true_top = np.array([])
+    if len(preds) > 10:
+        true_top = np.partition(preds, -10)[-k:]
+        true_top.sort()
+    else:
+        true_top = np.sort(preds)
+    true_top = true_top[::-1]
+    
+    max_dcg = dcg(true_top)
+    ideal_dcg = dcg(ideal_top)
+
+    if max_dcg == 0:
+        return 1
+
+    return ideal_dcg / max_dcg
\ No newline at end of file
diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
new file mode 100644
index 00000000..94b7cb4f
--- /dev/null
+++ b/csrank/objectranking/lambdamart.py
@@ -0,0 +1,376 @@
+import logging, math
+from collections import deque
+from multiprocessing import Pool
+from itertools import chain
+
+import numpy as np
+from sklearn.tree import DecisionTreeRegressor
+
+from csrank.learner import Learner
+from csrank.metrics import point_dcg, dcg, ndcg
+from csrank.objectranking.object_ranker import ObjectRanker
+
+class LambdaMART(ObjectRanker,Learner):
+    def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, learning_rate=1e-3,
+                 min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None, num_process = None,
+                 criterion="mse", splitter="best", min_weight_fraction_leaf=None, max_features=None, random_state=None, 
+                 min_impurity_decrease=None, min_impurity_split=None, **kwargs):
+        """
+        Create a LambdaMART based rank regression model. This model uses an ensemble of trees that learn to predict
+        the relevance scores of the documents based on the features, which then can be turned into rankings.
+        The base learner used is the implementation of Decision Tree from the sklearn tree package. The learner
+        tries to indirectly optimize the nDCG metric by learning the lambdas.
+
+        Parameters
+        ----------
+        n_object_features : int
+            Number of features of the object space
+        n_objects : int
+            Number of objects
+        number_of_trees : int
+            The maximum number of trees that are to be trained for the ensemble.
+        learning_rate : float
+            learning rate for the LambdaMART algorithm
+        min_samples_split : int
+            Number of samples required to split the internal node
+        max_depth : int
+            Maximum depth of the tree
+        min_samples_leaf : int
+            Minimum number of samples required to be at the leaf node
+
+        References
+        ----------
+            [1] Burges, Chris J.C. (2010, June). "From RankNet to LambdaRank to LambdaMART: An Overview"
+        """
+        self.n_object_features = n_object_features
+        self.n_objects = n_objects
+        self.number_of_trees = number_of_trees
+        self.learning_rate = learning_rate
+        self.min_samples_split = min_samples_split
+        self.max_depth = max_depth
+        self.min_samples_leaf = min_samples_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.num_process = num_process
+        self.ensemble = []
+        self.random_state = random_state
+        self.criterion = criterion
+        self.splitter = splitter
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.random_state = random_state
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+        self.logger = logging.getLogger(LambdaMART.__name__)
+
+    def _prepare_train_data(self, X, Y, **kwargs):
+        """
+            Transform the data provided in the form of X_train of shape (n_instances,n_objects,n_features) and y_train of shape (n_instances,n_documents) into (n_instances*n_objects,n_features). The output format is similar to the oneprovided by the cusrom dataset reader.
+
+            Parameters
+            ---------
+            X : numpy array
+                (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array
+                (n_instances, n_objects)
+                Rankings of the given objects
+            Returns
+            ------
+            Returns an array of shape (n_instances*n_objects,n_features) with the features and relevance scores derived from the ranking provided in y_train
+
+        """
+        #prepare array like features and imaginary qids
+        xdim = X.shape[0]  # n_instances - qid
+        ydim = X.shape[1]  # n_objects - documents
+        zdim = X.shape[2]  # n_features
+
+        features_as_list = deque()
+        for i in range(0,xdim):
+            for j in range(0,ydim):
+                row_as_list=deque([i])
+                features = deque()
+                for k in range(0, zdim):
+                    row_as_list.append(X[i, j, k])
+                features_as_list.append(row_as_list)
+
+        #Convert rankings to relevance scores     
+        scores_docsize = Y.shape[1]
+        relscore_train = np.subtract(scores_docsize, Y)
+
+        #prepare array like relevance score values
+        xdim_scores = relscore_train.shape[0]
+        ydim_scores = relscore_train.shape[1]
+
+        scores_as_list = deque()
+        for x in range(0,xdim_scores):
+            for y in range(0,ydim_scores):
+                scores_as_list.append(relscore_train[x,y])
+        
+        #Check if both the dimensions are the same
+        assert(len(features_as_list)==len(scores_as_list))
+        
+        #convert to numpy and resize the arrays 
+        features = np.asarray(features_as_list)
+        scores_unflat = np.array(scores_as_list)
+        scores = np.reshape(scores_unflat,(len(scores_unflat),1))
+
+        #Concatenate the reshaped arrays and return as trainin data
+        train_data = np.concatenate((scores,features),axis=1)
+
+        return train_data
+    
+    def _group_by_queries(self, data, queries):
+
+        """
+            Internal function which orders the data given as input based on the queries supplied.
+        """
+        result = []
+        curr_query = None
+        for s, q in zip(data, queries):
+            if q != curr_query:
+                result.append([])
+                curr_query = q
+            result[-1].append(s)
+        result = list(map(np.array, result))
+        return result
+    
+    def fit(self, X, y, **kwargs):
+        """
+            Fit a LambdaMART algorithm to the provided X and y arrays where X contains the features and y being the relevance scores.
+
+            Parameters
+            ----------
+            X : numpy array
+                (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array
+                (n_instances, n_objects)
+                Rankings of the given objects
+            **kwargs
+                Keyword arguments for the fit function
+            
+            Returns
+            -------
+            Returns the model which is in turn just a list of all the trees that make up the MART model
+
+        """
+        #check the case if the ensemble already has some trees then clear the trees so that the trees from the previous iteration are not used.
+        if len(self.ensemble) > 0:
+            self.ensemble.clear()
+        
+        train_file = self._prepare_train_data(X, y)
+        scores = train_file[:, 0]
+        queries = train_file[:, 1]
+        features = train_file[:, 3:]       
+
+        model_preds = np.zeros(len(features))
+        
+        for i in range(self.number_of_trees):
+            #print(" Iteration: " + str(i + 1))
+            true_data = self._group_by_queries(scores, queries)
+            model_data = self._group_by_queries(model_preds, queries)
+
+            with Pool(self.num_process) as pool:
+                lambdas_draft = pool.map(query_lambdas, list(zip(true_data, model_data)))
+                lambdas = list(chain(*lambdas_draft))
+
+            tree = DecisionTreeRegressor(criterion=self.criterion,
+                                         splitter=self.splitter,
+                                         max_depth=self.max_depth,
+                                         min_samples_split=self.min_samples_split,
+                                         min_samples_leaf=self.min_samples_leaf,
+                                         min_weight_fraction_leaf=self.min_weight_fraction_leaf,
+                                         max_features=None,
+                                         random_state=self.random_state,
+                                         max_leaf_nodes=self.max_leaf_nodes,
+                                         min_impurity_decrease=self.min_impurity_decrease,
+                                         min_impurity_split=self.min_impurity_split)
+            tree.fit(features, lambdas)
+
+            self.ensemble.append(tree)
+
+            prediction = tree.predict(features)
+            model_preds += self.learning_rate * prediction
+            #TODO: Remove the next two statements after debugging
+            train_score = self._score(model_preds, scores, queries, 10)
+            print("  --iteration train score " + str(train_score))
+        return self.ensemble
+
+    def _predict_scores_fixed(self, X, **kwargs):
+        """
+            Predict the scores for a given collection of sets of objects of same size.
+
+            Parameters
+            ----------
+            X : array-like, shape (n_samples, n_objects, n_features)
+
+
+            Returns
+            -------
+            Y : array-like, shape (n_samples, n_objects)
+                Returns the scores of each of the objects for each of the samples.
+        """
+        n_instances, n_objects, n_features = X.shape
+        self.logger.info("For Test instances {} objects {} features {}".format(*X.shape))
+        X1 = X.reshape(n_instances * n_objects, n_features)
+        scores = np.zeros(n_instances * n_objects)
+        for tree in self.ensemble:
+            scores += tree.predict(X1)
+        scores = scores.reshape(n_instances, n_objects)
+        return scores
+
+    def predict_scores(self, X, **kwargs):
+        """
+            Predict the utility scores for each object in the collection of set of objects called a query set.
+
+            Parameters
+            ----------
+            X : numpy array of size (n_instances, n_objects, n_features)
+
+            Returns
+            -------
+            Numpy array of size (n_instances, n_objects)
+        """
+        return super().predict_scores(X, **kwargs)
+
+    def predict_for_scores(self, scores, **kwargs):
+        """
+         Predict rankings for the scores for a given collection of sets of objects (query sets). Wrapper that calls the function of the same name 
+         belonging to the ObjectRanker super class.
+        """
+        return ObjectRanker.predict_for_scores(self, scores, **kwargs)
+
+    def predict(self, X, **kwargs):
+        return super().predict(X, **kwargs)
+
+    def _predict(self, pred_vector):
+        """
+            Predict the scores for the data supplied by iterating over the ensemble and returning the output.
+
+            Parameters
+            ----------
+            pred_vector: this is a numpy array of shape (n_objects,n_features)
+
+            Returns
+            -------
+            results: Predicted scores for each of the objects
+            queries: queries corresponding to the predictions that are made
+        """        
+        queries = pred_vector[:, 1]
+        features = pred_vector[:, 2:]        
+         
+        results = np.zeros(len(features))
+        for tree in self.ensemble:
+            results += tree.predict(features) * self.learning_rate
+        return results, queries
+
+    def _score(self, prediction, true_score, query, k=10):
+        """
+            Function that is used to score the performance of the model. 
+
+            Parameters
+            ----------
+            prediction: Predictions of the model
+            true_score: ground truth data of the predictions
+            query: queries accompanying the prediction data used to calculate the ndcg value
+
+            Returns
+            -------
+            Returns the average NDCG value calculated on the basis of the queries supplied, for the predictions
+        """
+        true_data = self._group_by_queries(true_score, query)
+        model_data = self._group_by_queries(prediction, query)
+
+        total_ndcg = []
+
+        for true_d, model_d in zip(true_data, model_data):
+            data = true_d[np.argsort(model_d)[::-1]]
+            total_ndcg.append(ndcg(data, k))
+
+        return sum(total_ndcg) / len(total_ndcg)
+
+    def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf, max_leaf_nodes,
+                               learning_rate, number_of_trees, criterion, splitter, min_weight_fraction_leaf, 
+                               max_features, random_state, min_impurity_decrease, min_impurity_split, **kwargs):
+        """
+            Set the tunable hyperparameters of the DecisionTree model used in LambdaMART
+
+            Parameters
+            ----------
+            min_samples_split : int
+                Number of samples required to split the internal node
+            max_depth : int
+                Maximum depth of the tree
+            min_samples_leaf : int
+                Minimum number of samples required to be at the leaf node
+            max_leaf_nodes : int
+                These are the maximum number of leaf nodes used to grow the tree
+            number_of_trees : int
+                The maximum number of trees that are to be trained for the ensemble.
+            learning_rate : float
+                learning rate for the LambdaMART algorithm
+        """
+        self.min_samples_split = min_samples_split
+        self.max_depth = max_depth
+        self.min_samples_leaf = min_samples_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.number_of_trees = number_of_trees
+        self.learning_rate = learning_rate
+        self.criterion = criterion
+        self.splitter = splitter
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.random_state = random_state
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+
+
+def query_lambdas(data, k=10):
+    """
+        This is used by the LambdaMART learner to compute the lambda values that are to be used as the target variable for the learner.
+        
+        Parameters
+        ----------
+        data : This contains the training data and the predictions from the previous iteration of the learning loop to calculate the lambda values
+
+        Returns
+        -------
+        Returns the lambda values calculated for the current iteration
+    """
+    true_data, model_data = data
+    worst_order = np.argsort(true_data)
+
+    true_data = true_data[worst_order]
+    model_data = model_data[worst_order]
+
+
+    model_order = np.argsort(model_data)
+
+    idcg = dcg(np.sort(true_data)[-10:][::-1])
+
+    size = len(true_data)
+    position_score = np.zeros((size, size))
+
+    for i in range(size):
+        for j in range(size):
+            position_score[model_order[i], model_order[j]] = \
+                point_dcg((model_order[j], true_data[model_order[i]]))
+
+    lambdas = np.zeros(size)
+
+    for i in range(size):
+        for j in range(size):
+                if true_data[i] > true_data[j]:
+
+                    delta_dcg  = position_score[i][j] - position_score[i][i]
+                    delta_dcg += position_score[j][i] - position_score[j][j]
+
+                    delta_ndcg = abs(delta_dcg / idcg)
+
+                    rho = 1 / (1 + math.exp(model_data[i] - model_data[j]))
+
+                    lam = rho * delta_ndcg
+
+                    lambdas[j] -= lam
+                    lambdas[i] += lam
+    return lambdas
\ No newline at end of file
diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py
index 22fe72df..e1d8f7fd 100644
--- a/csrank/tests/test_ranking.py
+++ b/csrank/tests/test_ranking.py
@@ -16,6 +16,8 @@
 object_rankers = {
     FATELINEAR_RANKER: (FATELinearObjectRanker, {"n_hidden_set_units": 12, "batch_size": 1}, (0.0, 1.0)),
     FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)),
+    LAMBDAMART: (LambdaMART, {"min_samples_split": 2, "max_depth": 50, "min_samples_leaf": 1,
+                              "max_leaf_nodes": 10}, (0.66, 0.0)),
     FETA_RANKER: (FETAObjectRanker, {"add_zeroth_order_model": True, "optimizer": optimizer}, (0.0, 1.0)),
     RANKNET: (RankNet, {"optimizer": optimizer}, (0.0, 1.0)),
     CMPNET: (CmpNet, {"optimizer": optimizer}, (0.0, 1.0)),

From 94da9f76975051000748e1fb4c8185bce81855f3 Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Mon, 27 Jan 2020 00:29:19 +0100
Subject: [PATCH 02/11] Condensed commits for all changes in new branch

---
 csrank/metrics.py                  |  39 ++-
 csrank/objectranking/lambdamart.py | 376 +++++++++++++++++++++++++++++
 csrank/tests/test_ranking.py       |   2 +
 3 files changed, 416 insertions(+), 1 deletion(-)
 create mode 100644 csrank/objectranking/lambdamart.py

diff --git a/csrank/metrics.py b/csrank/metrics.py
index eb554449..bc2e338d 100644
--- a/csrank/metrics.py
+++ b/csrank/metrics.py
@@ -47,6 +47,7 @@
 import numpy as np
 import tensorflow as tf
 from keras import backend as K
+import math
 
 from csrank.tensorflow_util import scores_to_rankings, get_instances_objects, tensorify
 
@@ -54,7 +55,8 @@
            'zero_one_rank_loss_for_scores_ties',
            'make_ndcg_at_k_loss', 'kendalls_tau_for_scores',
            'spearman_correlation_for_scores', "zero_one_accuracy",
-           "zero_one_accuracy_for_scores", "topk_categorical_accuracy"]
+           "zero_one_accuracy_for_scores", "topk_categorical_accuracy",
+           "point_dcg", "dcg", "ndcg"]
 
 
 def zero_one_rank_loss(y_true, y_pred):
@@ -331,3 +333,38 @@ def err(y_true, y_pred, utility_function=None, probability_mapping=None):
     results = tf.reduce_sum(discounted_document_values, axis=1)
 
     return K.mean(results)
+
+def point_dcg(args):
+    """
+        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
+    """
+    pos, label = args
+    return (2 ** label - 1) / math.log(pos + 2, 2)
+
+def dcg(preds):
+    """
+        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
+    """
+    return sum(map(point_dcg, enumerate(preds)))
+
+def ndcg(preds, k=10):
+    """
+        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
+    """
+    ideal_top = preds[:k]
+
+    true_top = np.array([])
+    if len(preds) > 10:
+        true_top = np.partition(preds, -10)[-k:]
+        true_top.sort()
+    else:
+        true_top = np.sort(preds)
+    true_top = true_top[::-1]
+    
+    max_dcg = dcg(true_top)
+    ideal_dcg = dcg(ideal_top)
+
+    if max_dcg == 0:
+        return 1
+
+    return ideal_dcg / max_dcg
\ No newline at end of file
diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
new file mode 100644
index 00000000..94b7cb4f
--- /dev/null
+++ b/csrank/objectranking/lambdamart.py
@@ -0,0 +1,376 @@
+import logging, math
+from collections import deque
+from multiprocessing import Pool
+from itertools import chain
+
+import numpy as np
+from sklearn.tree import DecisionTreeRegressor
+
+from csrank.learner import Learner
+from csrank.metrics import point_dcg, dcg, ndcg
+from csrank.objectranking.object_ranker import ObjectRanker
+
+class LambdaMART(ObjectRanker,Learner):
+    def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, learning_rate=1e-3,
+                 min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None, num_process = None,
+                 criterion="mse", splitter="best", min_weight_fraction_leaf=None, max_features=None, random_state=None, 
+                 min_impurity_decrease=None, min_impurity_split=None, **kwargs):
+        """
+        Create a LambdaMART based rank regression model. This model uses an ensemble of trees that learn to predict
+        the relevance scores of the documents based on the features, which then can be turned into rankings.
+        The base learner used is the implementation of Decision Tree from the sklearn tree package. The learner
+        tries to indirectly optimize the nDCG metric by learning the lambdas.
+
+        Parameters
+        ----------
+        n_object_features : int
+            Number of features of the object space
+        n_objects : int
+            Number of objects
+        number_of_trees : int
+            The maximum number of trees that are to be trained for the ensemble.
+        learning_rate : float
+            learning rate for the LambdaMART algorithm
+        min_samples_split : int
+            Number of samples required to split the internal node
+        max_depth : int
+            Maximum depth of the tree
+        min_samples_leaf : int
+            Minimum number of samples required to be at the leaf node
+
+        References
+        ----------
+            [1] Burges, Chris J.C. (2010, June). "From RankNet to LambdaRank to LambdaMART: An Overview"
+        """
+        self.n_object_features = n_object_features
+        self.n_objects = n_objects
+        self.number_of_trees = number_of_trees
+        self.learning_rate = learning_rate
+        self.min_samples_split = min_samples_split
+        self.max_depth = max_depth
+        self.min_samples_leaf = min_samples_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.num_process = num_process
+        self.ensemble = []
+        self.random_state = random_state
+        self.criterion = criterion
+        self.splitter = splitter
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.random_state = random_state
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+        self.logger = logging.getLogger(LambdaMART.__name__)
+
+    def _prepare_train_data(self, X, Y, **kwargs):
+        """
+            Transform the data provided in the form of X_train of shape (n_instances,n_objects,n_features) and y_train of shape (n_instances,n_documents) into (n_instances*n_objects,n_features). The output format is similar to the oneprovided by the cusrom dataset reader.
+
+            Parameters
+            ---------
+            X : numpy array
+                (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array
+                (n_instances, n_objects)
+                Rankings of the given objects
+            Returns
+            ------
+            Returns an array of shape (n_instances*n_objects,n_features) with the features and relevance scores derived from the ranking provided in y_train
+
+        """
+        #prepare array like features and imaginary qids
+        xdim = X.shape[0]  # n_instances - qid
+        ydim = X.shape[1]  # n_objects - documents
+        zdim = X.shape[2]  # n_features
+
+        features_as_list = deque()
+        for i in range(0,xdim):
+            for j in range(0,ydim):
+                row_as_list=deque([i])
+                features = deque()
+                for k in range(0, zdim):
+                    row_as_list.append(X[i, j, k])
+                features_as_list.append(row_as_list)
+
+        #Convert rankings to relevance scores     
+        scores_docsize = Y.shape[1]
+        relscore_train = np.subtract(scores_docsize, Y)
+
+        #prepare array like relevance score values
+        xdim_scores = relscore_train.shape[0]
+        ydim_scores = relscore_train.shape[1]
+
+        scores_as_list = deque()
+        for x in range(0,xdim_scores):
+            for y in range(0,ydim_scores):
+                scores_as_list.append(relscore_train[x,y])
+        
+        #Check if both the dimensions are the same
+        assert(len(features_as_list)==len(scores_as_list))
+        
+        #convert to numpy and resize the arrays 
+        features = np.asarray(features_as_list)
+        scores_unflat = np.array(scores_as_list)
+        scores = np.reshape(scores_unflat,(len(scores_unflat),1))
+
+        #Concatenate the reshaped arrays and return as trainin data
+        train_data = np.concatenate((scores,features),axis=1)
+
+        return train_data
+    
+    def _group_by_queries(self, data, queries):
+
+        """
+            Internal function which orders the data given as input based on the queries supplied.
+        """
+        result = []
+        curr_query = None
+        for s, q in zip(data, queries):
+            if q != curr_query:
+                result.append([])
+                curr_query = q
+            result[-1].append(s)
+        result = list(map(np.array, result))
+        return result
+    
+    def fit(self, X, y, **kwargs):
+        """
+            Fit a LambdaMART algorithm to the provided X and y arrays where X contains the features and y being the relevance scores.
+
+            Parameters
+            ----------
+            X : numpy array
+                (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array
+                (n_instances, n_objects)
+                Rankings of the given objects
+            **kwargs
+                Keyword arguments for the fit function
+            
+            Returns
+            -------
+            Returns the model which is in turn just a list of all the trees that make up the MART model
+
+        """
+        #check the case if the ensemble already has some trees then clear the trees so that the trees from the previous iteration are not used.
+        if len(self.ensemble) > 0:
+            self.ensemble.clear()
+        
+        train_file = self._prepare_train_data(X, y)
+        scores = train_file[:, 0]
+        queries = train_file[:, 1]
+        features = train_file[:, 3:]       
+
+        model_preds = np.zeros(len(features))
+        
+        for i in range(self.number_of_trees):
+            #print(" Iteration: " + str(i + 1))
+            true_data = self._group_by_queries(scores, queries)
+            model_data = self._group_by_queries(model_preds, queries)
+
+            with Pool(self.num_process) as pool:
+                lambdas_draft = pool.map(query_lambdas, list(zip(true_data, model_data)))
+                lambdas = list(chain(*lambdas_draft))
+
+            tree = DecisionTreeRegressor(criterion=self.criterion,
+                                         splitter=self.splitter,
+                                         max_depth=self.max_depth,
+                                         min_samples_split=self.min_samples_split,
+                                         min_samples_leaf=self.min_samples_leaf,
+                                         min_weight_fraction_leaf=self.min_weight_fraction_leaf,
+                                         max_features=None,
+                                         random_state=self.random_state,
+                                         max_leaf_nodes=self.max_leaf_nodes,
+                                         min_impurity_decrease=self.min_impurity_decrease,
+                                         min_impurity_split=self.min_impurity_split)
+            tree.fit(features, lambdas)
+
+            self.ensemble.append(tree)
+
+            prediction = tree.predict(features)
+            model_preds += self.learning_rate * prediction
+            #TODO: Remove the next two statements after debugging
+            train_score = self._score(model_preds, scores, queries, 10)
+            print("  --iteration train score " + str(train_score))
+        return self.ensemble
+
+    def _predict_scores_fixed(self, X, **kwargs):
+        """
+            Predict the scores for a given collection of sets of objects of same size.
+
+            Parameters
+            ----------
+            X : array-like, shape (n_samples, n_objects, n_features)
+
+
+            Returns
+            -------
+            Y : array-like, shape (n_samples, n_objects)
+                Returns the scores of each of the objects for each of the samples.
+        """
+        n_instances, n_objects, n_features = X.shape
+        self.logger.info("For Test instances {} objects {} features {}".format(*X.shape))
+        X1 = X.reshape(n_instances * n_objects, n_features)
+        scores = np.zeros(n_instances * n_objects)
+        for tree in self.ensemble:
+            scores += tree.predict(X1)
+        scores = scores.reshape(n_instances, n_objects)
+        return scores
+
+    def predict_scores(self, X, **kwargs):
+        """
+            Predict the utility scores for each object in the collection of set of objects called a query set.
+
+            Parameters
+            ----------
+            X : numpy array of size (n_instances, n_objects, n_features)
+
+            Returns
+            -------
+            Numpy array of size (n_instances, n_objects)
+        """
+        return super().predict_scores(X, **kwargs)
+
+    def predict_for_scores(self, scores, **kwargs):
+        """
+         Predict rankings for the scores for a given collection of sets of objects (query sets). Wrapper that calls the function of the same name 
+         belonging to the ObjectRanker super class.
+        """
+        return ObjectRanker.predict_for_scores(self, scores, **kwargs)
+
+    def predict(self, X, **kwargs):
+        return super().predict(X, **kwargs)
+
+    def _predict(self, pred_vector):
+        """
+            Predict the scores for the data supplied by iterating over the ensemble and returning the output.
+
+            Parameters
+            ----------
+            pred_vector: this is a numpy array of shape (n_objects,n_features)
+
+            Returns
+            -------
+            results: Predicted scores for each of the objects
+            queries: queries corresponding to the predictions that are made
+        """        
+        queries = pred_vector[:, 1]
+        features = pred_vector[:, 2:]        
+         
+        results = np.zeros(len(features))
+        for tree in self.ensemble:
+            results += tree.predict(features) * self.learning_rate
+        return results, queries
+
+    def _score(self, prediction, true_score, query, k=10):
+        """
+            Function that is used to score the performance of the model. 
+
+            Parameters
+            ----------
+            prediction: Predictions of the model
+            true_score: ground truth data of the predictions
+            query: queries accompanying the prediction data used to calculate the ndcg value
+
+            Returns
+            -------
+            Returns the average NDCG value calculated on the basis of the queries supplied, for the predictions
+        """
+        true_data = self._group_by_queries(true_score, query)
+        model_data = self._group_by_queries(prediction, query)
+
+        total_ndcg = []
+
+        for true_d, model_d in zip(true_data, model_data):
+            data = true_d[np.argsort(model_d)[::-1]]
+            total_ndcg.append(ndcg(data, k))
+
+        return sum(total_ndcg) / len(total_ndcg)
+
+    def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf, max_leaf_nodes,
+                               learning_rate, number_of_trees, criterion, splitter, min_weight_fraction_leaf, 
+                               max_features, random_state, min_impurity_decrease, min_impurity_split, **kwargs):
+        """
+            Set the tunable hyperparameters of the DecisionTree model used in LambdaMART
+
+            Parameters
+            ----------
+            min_samples_split : int
+                Number of samples required to split the internal node
+            max_depth : int
+                Maximum depth of the tree
+            min_samples_leaf : int
+                Minimum number of samples required to be at the leaf node
+            max_leaf_nodes : int
+                These are the maximum number of leaf nodes used to grow the tree
+            number_of_trees : int
+                The maximum number of trees that are to be trained for the ensemble.
+            learning_rate : float
+                learning rate for the LambdaMART algorithm
+        """
+        self.min_samples_split = min_samples_split
+        self.max_depth = max_depth
+        self.min_samples_leaf = min_samples_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.number_of_trees = number_of_trees
+        self.learning_rate = learning_rate
+        self.criterion = criterion
+        self.splitter = splitter
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.random_state = random_state
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+
+
+def query_lambdas(data, k=10):
+    """
+        This is used by the LambdaMART learner to compute the lambda values that are to be used as the target variable for the learner.
+        
+        Parameters
+        ----------
+        data : This contains the training data and the predictions from the previous iteration of the learning loop to calculate the lambda values
+
+        Returns
+        -------
+        Returns the lambda values calculated for the current iteration
+    """
+    true_data, model_data = data
+    worst_order = np.argsort(true_data)
+
+    true_data = true_data[worst_order]
+    model_data = model_data[worst_order]
+
+
+    model_order = np.argsort(model_data)
+
+    idcg = dcg(np.sort(true_data)[-10:][::-1])
+
+    size = len(true_data)
+    position_score = np.zeros((size, size))
+
+    for i in range(size):
+        for j in range(size):
+            position_score[model_order[i], model_order[j]] = \
+                point_dcg((model_order[j], true_data[model_order[i]]))
+
+    lambdas = np.zeros(size)
+
+    for i in range(size):
+        for j in range(size):
+                if true_data[i] > true_data[j]:
+
+                    delta_dcg  = position_score[i][j] - position_score[i][i]
+                    delta_dcg += position_score[j][i] - position_score[j][j]
+
+                    delta_ndcg = abs(delta_dcg / idcg)
+
+                    rho = 1 / (1 + math.exp(model_data[i] - model_data[j]))
+
+                    lam = rho * delta_ndcg
+
+                    lambdas[j] -= lam
+                    lambdas[i] += lam
+    return lambdas
\ No newline at end of file
diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py
index 22fe72df..e1d8f7fd 100644
--- a/csrank/tests/test_ranking.py
+++ b/csrank/tests/test_ranking.py
@@ -16,6 +16,8 @@
 object_rankers = {
     FATELINEAR_RANKER: (FATELinearObjectRanker, {"n_hidden_set_units": 12, "batch_size": 1}, (0.0, 1.0)),
     FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)),
+    LAMBDAMART: (LambdaMART, {"min_samples_split": 2, "max_depth": 50, "min_samples_leaf": 1,
+                              "max_leaf_nodes": 10}, (0.66, 0.0)),
     FETA_RANKER: (FETAObjectRanker, {"add_zeroth_order_model": True, "optimizer": optimizer}, (0.0, 1.0)),
     RANKNET: (RankNet, {"optimizer": optimizer}, (0.0, 1.0)),
     CMPNET: (CmpNet, {"optimizer": optimizer}, (0.0, 1.0)),

From b7d7c01e8b50f9f13a48327d1f1f857f83594a29 Mon Sep 17 00:00:00 2001
From: srinikrish22 <srinivas.dab@gmail.com>
Date: Fri, 14 Feb 2020 14:08:49 +0100
Subject: [PATCH 03/11] Fixed some linebreak suggestions and move the class
 specific metric functions to the lambdamart file itself

---
 csrank/metrics.py                  | 40 +------------
 csrank/objectranking/lambdamart.py | 90 +++++++++++++++++++++---------
 2 files changed, 67 insertions(+), 63 deletions(-)

diff --git a/csrank/metrics.py b/csrank/metrics.py
index bc2e338d..9aadb9c8 100644
--- a/csrank/metrics.py
+++ b/csrank/metrics.py
@@ -55,8 +55,7 @@
            'zero_one_rank_loss_for_scores_ties',
            'make_ndcg_at_k_loss', 'kendalls_tau_for_scores',
            'spearman_correlation_for_scores', "zero_one_accuracy",
-           "zero_one_accuracy_for_scores", "topk_categorical_accuracy",
-           "point_dcg", "dcg", "ndcg"]
+           "zero_one_accuracy_for_scores", "topk_categorical_accuracy"]
 
 
 def zero_one_rank_loss(y_true, y_pred):
@@ -332,39 +331,4 @@ def err(y_true, y_pred, utility_function=None, probability_mapping=None):
     discounted_document_values = tf.cast(satisfied_at_rank, tf.float64) * discount_at_rank
     results = tf.reduce_sum(discounted_document_values, axis=1)
 
-    return K.mean(results)
-
-def point_dcg(args):
-    """
-        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
-    """
-    pos, label = args
-    return (2 ** label - 1) / math.log(pos + 2, 2)
-
-def dcg(preds):
-    """
-        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
-    """
-    return sum(map(point_dcg, enumerate(preds)))
-
-def ndcg(preds, k=10):
-    """
-        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
-    """
-    ideal_top = preds[:k]
-
-    true_top = np.array([])
-    if len(preds) > 10:
-        true_top = np.partition(preds, -10)[-k:]
-        true_top.sort()
-    else:
-        true_top = np.sort(preds)
-    true_top = true_top[::-1]
-    
-    max_dcg = dcg(true_top)
-    ideal_dcg = dcg(ideal_top)
-
-    if max_dcg == 0:
-        return 1
-
-    return ideal_dcg / max_dcg
\ No newline at end of file
+    return K.mean(results)
\ No newline at end of file
diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index 94b7cb4f..d352d234 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -7,7 +7,6 @@
 from sklearn.tree import DecisionTreeRegressor
 
 from csrank.learner import Learner
-from csrank.metrics import point_dcg, dcg, ndcg
 from csrank.objectranking.object_ranker import ObjectRanker
 
 class LambdaMART(ObjectRanker,Learner):
@@ -64,7 +63,9 @@ def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, le
 
     def _prepare_train_data(self, X, Y, **kwargs):
         """
-            Transform the data provided in the form of X_train of shape (n_instances,n_objects,n_features) and y_train of shape (n_instances,n_documents) into (n_instances*n_objects,n_features). The output format is similar to the oneprovided by the cusrom dataset reader.
+            Transform the data provided in the form of X_train of shape (n_instances,n_objects,n_features) 
+            and y_train of shape (n_instances,n_documents) into (n_instances*n_objects,n_features). 
+            The output format is similar to the oneprovided by the cusrom dataset reader.
 
             Parameters
             ---------
@@ -76,7 +77,8 @@ def _prepare_train_data(self, X, Y, **kwargs):
                 Rankings of the given objects
             Returns
             ------
-            Returns an array of shape (n_instances*n_objects,n_features) with the features and relevance scores derived from the ranking provided in y_train
+            Returns an array of shape (n_instances*n_objects,n_features) with the features and relevance 
+            scores derived from the ranking provided in y_train
 
         """
         #prepare array like features and imaginary qids
@@ -120,7 +122,6 @@ def _prepare_train_data(self, X, Y, **kwargs):
         return train_data
     
     def _group_by_queries(self, data, queries):
-
         """
             Internal function which orders the data given as input based on the queries supplied.
         """
@@ -136,25 +137,27 @@ def _group_by_queries(self, data, queries):
     
     def fit(self, X, y, **kwargs):
         """
-            Fit a LambdaMART algorithm to the provided X and y arrays where X contains the features and y being the relevance scores.
-
-            Parameters
-            ----------
-            X : numpy array
-                (n_instances, n_objects, n_features)
-                Feature vectors of the objects
-            Y : numpy array
-                (n_instances, n_objects)
-                Rankings of the given objects
-            **kwargs
-                Keyword arguments for the fit function
+           Fit a LambdaMART algorithm to the provided X and y arrays where X contains the features and y 
+           being the relevance scores.
+
+           Parameters
+           ----------
+           X : numpy array
+               (n_instances, n_objects, n_features)
+               Feature vectors of the objects
+           Y : numpy array
+               (n_instances, n_objects)
+               Rankings of the given objects
+           **kwargs
+               Keyword arguments for the fit function
             
-            Returns
-            -------
-            Returns the model which is in turn just a list of all the trees that make up the MART model
+           Returns
+           -------
+           Returns the model which is in turn just a list of all the trees that make up the MART model
 
         """
-        #check the case if the ensemble already has some trees then clear the trees so that the trees from the previous iteration are not used.
+        #check the case if the ensemble already has some trees then clear the trees so that the trees 
+        #from the previous iteration are not used.
         if len(self.ensemble) > 0:
             self.ensemble.clear()
         
@@ -235,8 +238,8 @@ def predict_scores(self, X, **kwargs):
 
     def predict_for_scores(self, scores, **kwargs):
         """
-         Predict rankings for the scores for a given collection of sets of objects (query sets). Wrapper that calls the function of the same name 
-         belonging to the ObjectRanker super class.
+         Predict rankings for the scores for a given collection of sets of objects (query sets). 
+         Wrapper that calls the function of the same name belonging to the ObjectRanker super class.
         """
         return ObjectRanker.predict_for_scores(self, scores, **kwargs)
 
@@ -327,11 +330,13 @@ def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf,
 
 def query_lambdas(data, k=10):
     """
-        This is used by the LambdaMART learner to compute the lambda values that are to be used as the target variable for the learner.
+        This is used by the LambdaMART learner to compute the lambda values that are to be used as the 
+        target variable for the learner.
         
         Parameters
         ----------
-        data : This contains the training data and the predictions from the previous iteration of the learning loop to calculate the lambda values
+        data : This contains the training data and the predictions from the previous iteration of 
+        the learning loop to calculate the lambda values
 
         Returns
         -------
@@ -373,4 +378,39 @@ def query_lambdas(data, k=10):
 
                     lambdas[j] -= lam
                     lambdas[i] += lam
-    return lambdas
\ No newline at end of file
+    return lambdas
+
+def point_dcg(args):
+    """
+        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
+    """
+    pos, label = args
+    return (2 ** label - 1) / np.log2(pos + 2)
+
+def dcg(preds):
+    """
+        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
+    """
+    return sum(map(point_dcg, enumerate(preds)))
+
+def ndcg(preds, k=10):
+    """
+        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
+    """
+    ideal_top = preds[:k]
+
+    true_top = np.array([])
+    if len(preds) > 10:
+        true_top = np.partition(preds, -10)[-k:]
+        true_top.sort()
+    else:
+        true_top = np.sort(preds)
+    true_top = true_top[::-1]
+    
+    max_dcg = dcg(true_top)
+    ideal_dcg = dcg(ideal_top)
+
+    if max_dcg == 0:
+        return 1
+
+    return ideal_dcg / max_dcg
\ No newline at end of file

From 80352c8e29fc9e1feb7be83eceefeee9883a9770 Mon Sep 17 00:00:00 2001
From: srinikrish22 <srinivas.dab@gmail.com>
Date: Fri, 14 Feb 2020 15:32:34 +0100
Subject: [PATCH 04/11] More changes related to formatting and added LambdaMart
 class to the init file

---
 csrank/objectranking/__init__.py   |  1 +
 csrank/objectranking/lambdamart.py | 75 ++++++++++++++++++------------
 2 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py
index 8eeaab74..ddb18f99 100644
--- a/csrank/objectranking/__init__.py
+++ b/csrank/objectranking/__init__.py
@@ -8,3 +8,4 @@
 from .rank_net import RankNet
 from .rank_svm import RankSVM
 from .baseline import RandomBaselineRanker
+from .lambdamart import LambdaMART
diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index d352d234..3077da45 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -15,10 +15,12 @@ def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, le
                  criterion="mse", splitter="best", min_weight_fraction_leaf=None, max_features=None, random_state=None, 
                  min_impurity_decrease=None, min_impurity_split=None, **kwargs):
         """
-        Create a LambdaMART based rank regression model. This model uses an ensemble of trees that learn to predict
-        the relevance scores of the documents based on the features, which then can be turned into rankings.
-        The base learner used is the implementation of Decision Tree from the sklearn tree package. The learner
-        tries to indirectly optimize the nDCG metric by learning the lambdas.
+        Create a LambdaMART based rank regression model. This model uses an 
+        ensemble of trees that learn to predict the relevance scores of 
+        the documents based on the features, which then can be turned into 
+        rankings. The base learner used is the implementation of Decision 
+        Tree from the sklearn tree package. The learner tries to indirectly 
+        optimize the nDCG metric by learning the lambdas.
 
         Parameters
         ----------
@@ -63,8 +65,9 @@ def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, le
 
     def _prepare_train_data(self, X, Y, **kwargs):
         """
-            Transform the data provided in the form of X_train of shape (n_instances,n_objects,n_features) 
-            and y_train of shape (n_instances,n_documents) into (n_instances*n_objects,n_features). 
+            Transform the data provided in the form of X_train of shape 
+            (n_instances,n_objects,n_features) and y_train of shape 
+            (n_instances,n_documents) into (n_instances*n_objects,n_features).
             The output format is similar to the oneprovided by the cusrom dataset reader.
 
             Parameters
@@ -77,8 +80,8 @@ def _prepare_train_data(self, X, Y, **kwargs):
                 Rankings of the given objects
             Returns
             ------
-            Returns an array of shape (n_instances*n_objects,n_features) with the features and relevance 
-            scores derived from the ranking provided in y_train
+            Returns an array of shape (n_instances*n_objects,n_features) with the 
+            features and relevance scores derived from the ranking provided in y_train
 
         """
         #prepare array like features and imaginary qids
@@ -123,7 +126,8 @@ def _prepare_train_data(self, X, Y, **kwargs):
     
     def _group_by_queries(self, data, queries):
         """
-            Internal function which orders the data given as input based on the queries supplied.
+            Internal function which orders the data given as input based
+            on the queries supplied.
         """
         result = []
         curr_query = None
@@ -137,8 +141,8 @@ def _group_by_queries(self, data, queries):
     
     def fit(self, X, y, **kwargs):
         """
-           Fit a LambdaMART algorithm to the provided X and y arrays where X contains the features and y 
-           being the relevance scores.
+           Fit a LambdaMART algorithm to the provided X and y arrays where X 
+           contains the features and y being the relevance scores.
 
            Parameters
            ----------
@@ -153,11 +157,12 @@ def fit(self, X, y, **kwargs):
             
            Returns
            -------
-           Returns the model which is in turn just a list of all the trees that make up the MART model
+           Returns the model which is in turn just a list of all the trees that
+           make up the MART model
 
         """
-        #check the case if the ensemble already has some trees then clear the trees so that the trees 
-        #from the previous iteration are not used.
+        #check the case if the ensemble already has some trees then clear the trees
+        # so that the trees from the previous iteration are not used.
         if len(self.ensemble) > 0:
             self.ensemble.clear()
         
@@ -196,8 +201,8 @@ def fit(self, X, y, **kwargs):
             model_preds += self.learning_rate * prediction
             #TODO: Remove the next two statements after debugging
             train_score = self._score(model_preds, scores, queries, 10)
-            print("  --iteration train score " + str(train_score))
-        return self.ensemble
+            #print("  --iteration train score " + str(train_score))
+        #return self.ensemble
 
     def _predict_scores_fixed(self, X, **kwargs):
         """
@@ -224,7 +229,8 @@ def _predict_scores_fixed(self, X, **kwargs):
 
     def predict_scores(self, X, **kwargs):
         """
-            Predict the utility scores for each object in the collection of set of objects called a query set.
+            Predict the utility scores for each object in the collection of set
+            of objects called a query set.
 
             Parameters
             ----------
@@ -238,8 +244,9 @@ def predict_scores(self, X, **kwargs):
 
     def predict_for_scores(self, scores, **kwargs):
         """
-         Predict rankings for the scores for a given collection of sets of objects (query sets). 
-         Wrapper that calls the function of the same name belonging to the ObjectRanker super class.
+         Predict rankings for the scores for a given collection of sets of objects
+         (query sets). Wrapper that calls the function of the same name belonging
+         to the ObjectRanker super class.
         """
         return ObjectRanker.predict_for_scores(self, scores, **kwargs)
 
@@ -248,7 +255,8 @@ def predict(self, X, **kwargs):
 
     def _predict(self, pred_vector):
         """
-            Predict the scores for the data supplied by iterating over the ensemble and returning the output.
+            Predict the scores for the data supplied by iterating over the
+            ensemble and returning the output.
 
             Parameters
             ----------
@@ -275,11 +283,13 @@ def _score(self, prediction, true_score, query, k=10):
             ----------
             prediction: Predictions of the model
             true_score: ground truth data of the predictions
-            query: queries accompanying the prediction data used to calculate the ndcg value
+            query: queries accompanying the prediction data used to calculate
+            the ndcg value
 
             Returns
             -------
-            Returns the average NDCG value calculated on the basis of the queries supplied, for the predictions
+            Returns the average NDCG value calculated on the basis of the 
+            queries supplied, for the predictions
         """
         true_data = self._group_by_queries(true_score, query)
         model_data = self._group_by_queries(prediction, query)
@@ -296,7 +306,8 @@ def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf,
                                learning_rate, number_of_trees, criterion, splitter, min_weight_fraction_leaf, 
                                max_features, random_state, min_impurity_decrease, min_impurity_split, **kwargs):
         """
-            Set the tunable hyperparameters of the DecisionTree model used in LambdaMART
+            Set the tunable hyperparameters of the DecisionTree model 
+            used in LambdaMART
 
             Parameters
             ----------
@@ -330,13 +341,13 @@ def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf,
 
 def query_lambdas(data, k=10):
     """
-        This is used by the LambdaMART learner to compute the lambda values that are to be used as the 
-        target variable for the learner.
+        This is used by the LambdaMART learner to compute the lambda values that 
+        are to be used as the target variable for the learner.
         
         Parameters
         ----------
-        data : This contains the training data and the predictions from the previous iteration of 
-        the learning loop to calculate the lambda values
+        data : This contains the training data and the predictions from the 
+        previous iteration of the learning loop to calculate the lambda values
 
         Returns
         -------
@@ -382,20 +393,24 @@ def query_lambdas(data, k=10):
 
 def point_dcg(args):
     """
-        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
+        Point DCG calculation function. Calculates the DCG for a given list. This
+        list is assumed to be consisting of the rankings of documents belonging to
+        the same query 
     """
     pos, label = args
     return (2 ** label - 1) / np.log2(pos + 2)
 
 def dcg(preds):
     """
-        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
+        List DCG calculation function. This function turns the list of rankings 
+        into a form which is easier to be passed to the point DCG function
     """
     return sum(map(point_dcg, enumerate(preds)))
 
 def ndcg(preds, k=10):
     """
-        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
+        NDCG calculation function that calculates the NDCG values with the help 
+        of the DCG calculation helper functions.
     """
     ideal_top = preds[:k]
 

From 0bd330be9682fe317039a5fcbed074caeb0c9fbb Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Sun, 16 Feb 2020 17:55:50 +0100
Subject: [PATCH 05/11] Changed default values for some DecisionTree params

---
 csrank/objectranking/lambdamart.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index 3077da45..2830880a 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -12,8 +12,8 @@
 class LambdaMART(ObjectRanker,Learner):
     def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, learning_rate=1e-3,
                  min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None, num_process = None,
-                 criterion="mse", splitter="best", min_weight_fraction_leaf=None, max_features=None, random_state=None, 
-                 min_impurity_decrease=None, min_impurity_split=None, **kwargs):
+                 criterion="mse", splitter="best", min_weight_fraction_leaf=0.0, max_features=None, random_state=None, 
+                 min_impurity_decrease=0.0, min_impurity_split=1e-7, **kwargs):
         """
         Create a LambdaMART based rank regression model. This model uses an 
         ensemble of trees that learn to predict the relevance scores of 
@@ -62,6 +62,8 @@ def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, le
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split
         self.logger = logging.getLogger(LambdaMART.__name__)
+        #TODO: Used for Debugging, remove for production
+        #print("LambdaMART init 2")
 
     def _prepare_train_data(self, X, Y, **kwargs):
         """
@@ -200,7 +202,7 @@ def fit(self, X, y, **kwargs):
             prediction = tree.predict(features)
             model_preds += self.learning_rate * prediction
             #TODO: Remove the next two statements after debugging
-            train_score = self._score(model_preds, scores, queries, 10)
+            #train_score = self._score(model_preds, scores, queries, 10)
             #print("  --iteration train score " + str(train_score))
         #return self.ensemble
 

From a078b0b1fd12e9748fc338c55c12fd6292ce0068 Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Sun, 16 Feb 2020 18:06:41 +0100
Subject: [PATCH 06/11] Added ranker to the constants for testing

---
 csrank/experiments/constants.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/csrank/experiments/constants.py b/csrank/experiments/constants.py
index 97c5314d..78dfcf37 100644
--- a/csrank/experiments/constants.py
+++ b/csrank/experiments/constants.py
@@ -27,6 +27,7 @@
 FATELINEAR_RANKER = "fatelinear_ranker"
 FETALINEAR_RANKER = "fetalinear_ranker"
 RANDOM_RANKER = "random_ranker"
+LAMBDAMART = "lambdamart"
 
 FETA_CHOICE = 'feta_choice'
 FETALINEAR_CHOICE = "fetalinear_choice"
@@ -59,4 +60,4 @@
 DCMS = [FETA_DC, FATE_DC, RANKNET_DC, MNL, NLM, GEV, PCL, MLM, RANKSVM_DC, FATELINEAR_DC, FETALINEAR_DC, RANDOM_DC]
 CHOICE_FUNCTIONS = [FETA_CHOICE, FATE_CHOICE, RANKNET_CHOICE, RANKSVM_CHOICE, GLM_CHOICE, RANDOM_CHOICE,
                     FATELINEAR_CHOICE, FETALINEAR_CHOICE]
-OBJECT_RANKERS = [FATE_RANKER, FETA_RANKER, FATELINEAR_RANKER, FETALINEAR_RANKER, RANKSVM, ERR, RANKNET, LISTNET, RANDOM_RANKER]
+OBJECT_RANKERS = [FATE_RANKER, FETA_RANKER, FATELINEAR_RANKER, FETALINEAR_RANKER, RANKSVM, ERR, RANKNET, LISTNET, RANDOM_RANKER, LAMBDAMART]

From cc5913c0a0677f41da68b75fa9463890ec1d0212 Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Sun, 16 Feb 2020 18:51:42 +0100
Subject: [PATCH 07/11] Set some default parameters for tunable parameters

---
 csrank/objectranking/lambdamart.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index 2830880a..b142163d 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -304,9 +304,9 @@ def _score(self, prediction, true_score, query, k=10):
 
         return sum(total_ndcg) / len(total_ndcg)
 
-    def set_tunable_parameters(self, min_samples_split, max_depth, min_samples_leaf, max_leaf_nodes,
-                               learning_rate, number_of_trees, criterion, splitter, min_weight_fraction_leaf, 
-                               max_features, random_state, min_impurity_decrease, min_impurity_split, **kwargs):
+    def set_tunable_parameters(self, min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None,
+                               learning_rate=1e-3, number_of_trees=5, criterion="mse", splitter="best", min_weight_fraction_leaf=0.0, 
+                               max_features=None, random_state=None, min_impurity_decrease=0.0, min_impurity_split=1e-7, **kwargs):
         """
             Set the tunable hyperparameters of the DecisionTree model 
             used in LambdaMART

From eb242cbd189c3fa77218e54e1f72135381be4cfc Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Sun, 16 Feb 2020 23:58:46 +0100
Subject: [PATCH 08/11] Made bugfixes to the fit function

---
 csrank/objectranking/lambdamart.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index b142163d..bdb9214a 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -171,7 +171,7 @@ def fit(self, X, y, **kwargs):
         train_file = self._prepare_train_data(X, y)
         scores = train_file[:, 0]
         queries = train_file[:, 1]
-        features = train_file[:, 3:]       
+        features = train_file[:, 2:]
 
         model_preds = np.zeros(len(features))
         
@@ -203,7 +203,7 @@ def fit(self, X, y, **kwargs):
             model_preds += self.learning_rate * prediction
             #TODO: Remove the next two statements after debugging
             #train_score = self._score(model_preds, scores, queries, 10)
-            #print("  --iteration train score " + str(train_score))
+            #print("  --iteration train score " + str(train_score), X.shape, " and ", y.shape)
         #return self.ensemble
 
     def _predict_scores_fixed(self, X, **kwargs):

From bb4c064432ad3276b4a44ba9a3247d92c81471bf Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Mon, 17 Feb 2020 00:13:46 +0100
Subject: [PATCH 09/11] Run the black formatter on lambdamart file

---
 csrank/objectranking/lambdamart.py | 172 ++++++++++++++++++-----------
 1 file changed, 106 insertions(+), 66 deletions(-)

diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index bdb9214a..b4c817a5 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -9,11 +9,28 @@
 from csrank.learner import Learner
 from csrank.objectranking.object_ranker import ObjectRanker
 
-class LambdaMART(ObjectRanker,Learner):
-    def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, learning_rate=1e-3,
-                 min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None, num_process = None,
-                 criterion="mse", splitter="best", min_weight_fraction_leaf=0.0, max_features=None, random_state=None, 
-                 min_impurity_decrease=0.0, min_impurity_split=1e-7, **kwargs):
+
+class LambdaMART(ObjectRanker, Learner):
+    def __init__(
+        self,
+        n_objects=None,
+        n_object_features=None,
+        number_of_trees=5,
+        learning_rate=1e-3,
+        min_samples_split=2,
+        max_depth=50,
+        min_samples_leaf=1,
+        max_leaf_nodes=None,
+        num_process=None,
+        criterion="mse",
+        splitter="best",
+        min_weight_fraction_leaf=0.0,
+        max_features=None,
+        random_state=None,
+        min_impurity_decrease=0.0,
+        min_impurity_split=1e-7,
+        **kwargs
+    ):
         """
         Create a LambdaMART based rank regression model. This model uses an 
         ensemble of trees that learn to predict the relevance scores of 
@@ -62,8 +79,8 @@ def __init__(self, n_objects=None, n_object_features=None, number_of_trees=5, le
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split
         self.logger = logging.getLogger(LambdaMART.__name__)
-        #TODO: Used for Debugging, remove for production
-        #print("LambdaMART init 2")
+        # TODO: Used for Debugging, remove for production
+        # print("LambdaMART init 2")
 
     def _prepare_train_data(self, X, Y, **kwargs):
         """
@@ -86,46 +103,46 @@ def _prepare_train_data(self, X, Y, **kwargs):
             features and relevance scores derived from the ranking provided in y_train
 
         """
-        #prepare array like features and imaginary qids
+        # prepare array like features and imaginary qids
         xdim = X.shape[0]  # n_instances - qid
         ydim = X.shape[1]  # n_objects - documents
         zdim = X.shape[2]  # n_features
 
         features_as_list = deque()
-        for i in range(0,xdim):
-            for j in range(0,ydim):
-                row_as_list=deque([i])
+        for i in range(0, xdim):
+            for j in range(0, ydim):
+                row_as_list = deque([i])
                 features = deque()
                 for k in range(0, zdim):
                     row_as_list.append(X[i, j, k])
                 features_as_list.append(row_as_list)
 
-        #Convert rankings to relevance scores     
+        # Convert rankings to relevance scores
         scores_docsize = Y.shape[1]
         relscore_train = np.subtract(scores_docsize, Y)
 
-        #prepare array like relevance score values
+        # prepare array like relevance score values
         xdim_scores = relscore_train.shape[0]
         ydim_scores = relscore_train.shape[1]
 
         scores_as_list = deque()
-        for x in range(0,xdim_scores):
-            for y in range(0,ydim_scores):
-                scores_as_list.append(relscore_train[x,y])
-        
-        #Check if both the dimensions are the same
-        assert(len(features_as_list)==len(scores_as_list))
-        
-        #convert to numpy and resize the arrays 
+        for x in range(0, xdim_scores):
+            for y in range(0, ydim_scores):
+                scores_as_list.append(relscore_train[x, y])
+
+        # Check if both the dimensions are the same
+        assert len(features_as_list) == len(scores_as_list)
+
+        # convert to numpy and resize the arrays
         features = np.asarray(features_as_list)
         scores_unflat = np.array(scores_as_list)
-        scores = np.reshape(scores_unflat,(len(scores_unflat),1))
+        scores = np.reshape(scores_unflat, (len(scores_unflat), 1))
 
-        #Concatenate the reshaped arrays and return as trainin data
-        train_data = np.concatenate((scores,features),axis=1)
+        # Concatenate the reshaped arrays and return as trainin data
+        train_data = np.concatenate((scores, features), axis=1)
 
         return train_data
-    
+
     def _group_by_queries(self, data, queries):
         """
             Internal function which orders the data given as input based
@@ -140,7 +157,7 @@ def _group_by_queries(self, data, queries):
             result[-1].append(s)
         result = list(map(np.array, result))
         return result
-    
+
     def fit(self, X, y, **kwargs):
         """
            Fit a LambdaMART algorithm to the provided X and y arrays where X 
@@ -163,48 +180,52 @@ def fit(self, X, y, **kwargs):
            make up the MART model
 
         """
-        #check the case if the ensemble already has some trees then clear the trees
+        # check the case if the ensemble already has some trees then clear the trees
         # so that the trees from the previous iteration are not used.
         if len(self.ensemble) > 0:
             self.ensemble.clear()
-        
+
         train_file = self._prepare_train_data(X, y)
         scores = train_file[:, 0]
         queries = train_file[:, 1]
         features = train_file[:, 2:]
 
         model_preds = np.zeros(len(features))
-        
+
         for i in range(self.number_of_trees):
-            #print(" Iteration: " + str(i + 1))
+            # print(" Iteration: " + str(i + 1))
             true_data = self._group_by_queries(scores, queries)
             model_data = self._group_by_queries(model_preds, queries)
 
             with Pool(self.num_process) as pool:
-                lambdas_draft = pool.map(query_lambdas, list(zip(true_data, model_data)))
+                lambdas_draft = pool.map(
+                    query_lambdas, list(zip(true_data, model_data))
+                )
                 lambdas = list(chain(*lambdas_draft))
 
-            tree = DecisionTreeRegressor(criterion=self.criterion,
-                                         splitter=self.splitter,
-                                         max_depth=self.max_depth,
-                                         min_samples_split=self.min_samples_split,
-                                         min_samples_leaf=self.min_samples_leaf,
-                                         min_weight_fraction_leaf=self.min_weight_fraction_leaf,
-                                         max_features=None,
-                                         random_state=self.random_state,
-                                         max_leaf_nodes=self.max_leaf_nodes,
-                                         min_impurity_decrease=self.min_impurity_decrease,
-                                         min_impurity_split=self.min_impurity_split)
+            tree = DecisionTreeRegressor(
+                criterion=self.criterion,
+                splitter=self.splitter,
+                max_depth=self.max_depth,
+                min_samples_split=self.min_samples_split,
+                min_samples_leaf=self.min_samples_leaf,
+                min_weight_fraction_leaf=self.min_weight_fraction_leaf,
+                max_features=None,
+                random_state=self.random_state,
+                max_leaf_nodes=self.max_leaf_nodes,
+                min_impurity_decrease=self.min_impurity_decrease,
+                min_impurity_split=self.min_impurity_split,
+            )
             tree.fit(features, lambdas)
 
             self.ensemble.append(tree)
 
             prediction = tree.predict(features)
             model_preds += self.learning_rate * prediction
-            #TODO: Remove the next two statements after debugging
-            #train_score = self._score(model_preds, scores, queries, 10)
-            #print("  --iteration train score " + str(train_score), X.shape, " and ", y.shape)
-        #return self.ensemble
+            # TODO: Remove the next two statements after debugging
+            # train_score = self._score(model_preds, scores, queries, 10)
+            # print("  --iteration train score " + str(train_score), X.shape, " and ", y.shape)
+        # return self.ensemble
 
     def _predict_scores_fixed(self, X, **kwargs):
         """
@@ -221,7 +242,9 @@ def _predict_scores_fixed(self, X, **kwargs):
                 Returns the scores of each of the objects for each of the samples.
         """
         n_instances, n_objects, n_features = X.shape
-        self.logger.info("For Test instances {} objects {} features {}".format(*X.shape))
+        self.logger.info(
+            "For Test instances {} objects {} features {}".format(*X.shape)
+        )
         X1 = X.reshape(n_instances * n_objects, n_features)
         scores = np.zeros(n_instances * n_objects)
         for tree in self.ensemble:
@@ -268,10 +291,10 @@ def _predict(self, pred_vector):
             -------
             results: Predicted scores for each of the objects
             queries: queries corresponding to the predictions that are made
-        """        
+        """
         queries = pred_vector[:, 1]
-        features = pred_vector[:, 2:]        
-         
+        features = pred_vector[:, 2:]
+
         results = np.zeros(len(features))
         for tree in self.ensemble:
             results += tree.predict(features) * self.learning_rate
@@ -304,9 +327,23 @@ def _score(self, prediction, true_score, query, k=10):
 
         return sum(total_ndcg) / len(total_ndcg)
 
-    def set_tunable_parameters(self, min_samples_split=2, max_depth=50, min_samples_leaf=1, max_leaf_nodes=None,
-                               learning_rate=1e-3, number_of_trees=5, criterion="mse", splitter="best", min_weight_fraction_leaf=0.0, 
-                               max_features=None, random_state=None, min_impurity_decrease=0.0, min_impurity_split=1e-7, **kwargs):
+    def set_tunable_parameters(
+        self,
+        min_samples_split=2,
+        max_depth=50,
+        min_samples_leaf=1,
+        max_leaf_nodes=None,
+        learning_rate=1e-3,
+        number_of_trees=5,
+        criterion="mse",
+        splitter="best",
+        min_weight_fraction_leaf=0.0,
+        max_features=None,
+        random_state=None,
+        min_impurity_decrease=0.0,
+        min_impurity_split=1e-7,
+        **kwargs
+    ):
         """
             Set the tunable hyperparameters of the DecisionTree model 
             used in LambdaMART
@@ -361,7 +398,6 @@ def query_lambdas(data, k=10):
     true_data = true_data[worst_order]
     model_data = model_data[worst_order]
 
-
     model_order = np.argsort(model_data)
 
     idcg = dcg(np.sort(true_data)[-10:][::-1])
@@ -371,28 +407,30 @@ def query_lambdas(data, k=10):
 
     for i in range(size):
         for j in range(size):
-            position_score[model_order[i], model_order[j]] = \
-                point_dcg((model_order[j], true_data[model_order[i]]))
+            position_score[model_order[i], model_order[j]] = point_dcg(
+                (model_order[j], true_data[model_order[i]])
+            )
 
     lambdas = np.zeros(size)
 
     for i in range(size):
         for j in range(size):
-                if true_data[i] > true_data[j]:
+            if true_data[i] > true_data[j]:
 
-                    delta_dcg  = position_score[i][j] - position_score[i][i]
-                    delta_dcg += position_score[j][i] - position_score[j][j]
+                delta_dcg = position_score[i][j] - position_score[i][i]
+                delta_dcg += position_score[j][i] - position_score[j][j]
 
-                    delta_ndcg = abs(delta_dcg / idcg)
+                delta_ndcg = abs(delta_dcg / idcg)
 
-                    rho = 1 / (1 + math.exp(model_data[i] - model_data[j]))
+                rho = 1 / (1 + math.exp(model_data[i] - model_data[j]))
 
-                    lam = rho * delta_ndcg
+                lam = rho * delta_ndcg
 
-                    lambdas[j] -= lam
-                    lambdas[i] += lam
+                lambdas[j] -= lam
+                lambdas[i] += lam
     return lambdas
 
+
 def point_dcg(args):
     """
         Point DCG calculation function. Calculates the DCG for a given list. This
@@ -402,6 +440,7 @@ def point_dcg(args):
     pos, label = args
     return (2 ** label - 1) / np.log2(pos + 2)
 
+
 def dcg(preds):
     """
         List DCG calculation function. This function turns the list of rankings 
@@ -409,6 +448,7 @@ def dcg(preds):
     """
     return sum(map(point_dcg, enumerate(preds)))
 
+
 def ndcg(preds, k=10):
     """
         NDCG calculation function that calculates the NDCG values with the help 
@@ -423,11 +463,11 @@ def ndcg(preds, k=10):
     else:
         true_top = np.sort(preds)
     true_top = true_top[::-1]
-    
+
     max_dcg = dcg(true_top)
     ideal_dcg = dcg(ideal_top)
 
     if max_dcg == 0:
         return 1
 
-    return ideal_dcg / max_dcg
\ No newline at end of file
+    return ideal_dcg / max_dcg

From df665011ea3e6a0372c2b58630aa407b2c7d4887 Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Mon, 17 Feb 2020 00:19:17 +0100
Subject: [PATCH 10/11] Some final touches

---
 csrank/objectranking/lambdamart.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/csrank/objectranking/lambdamart.py b/csrank/objectranking/lambdamart.py
index b4c817a5..c58f5026 100644
--- a/csrank/objectranking/lambdamart.py
+++ b/csrank/objectranking/lambdamart.py
@@ -79,8 +79,6 @@ def __init__(
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split
         self.logger = logging.getLogger(LambdaMART.__name__)
-        # TODO: Used for Debugging, remove for production
-        # print("LambdaMART init 2")
 
     def _prepare_train_data(self, X, Y, **kwargs):
         """
@@ -193,7 +191,6 @@ def fit(self, X, y, **kwargs):
         model_preds = np.zeros(len(features))
 
         for i in range(self.number_of_trees):
-            # print(" Iteration: " + str(i + 1))
             true_data = self._group_by_queries(scores, queries)
             model_data = self._group_by_queries(model_preds, queries)
 
@@ -222,10 +219,8 @@ def fit(self, X, y, **kwargs):
 
             prediction = tree.predict(features)
             model_preds += self.learning_rate * prediction
-            # TODO: Remove the next two statements after debugging
             # train_score = self._score(model_preds, scores, queries, 10)
-            # print("  --iteration train score " + str(train_score), X.shape, " and ", y.shape)
-        # return self.ensemble
+            # print("iteration"+ i +" train score " + str(train_score)+" "+str(X.shape) + " and "+ str(y.shape))
 
     def _predict_scores_fixed(self, X, **kwargs):
         """

From d8a4e6e27254b666adbe45bc16c5069608136b5f Mon Sep 17 00:00:00 2001
From: srinikrish22 <ksrinivas812@gmail.com>
Date: Mon, 17 Feb 2020 00:30:50 +0100
Subject: [PATCH 11/11] Made changes to testcase

---
 csrank/tests/test_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py
index e1d8f7fd..78860e65 100644
--- a/csrank/tests/test_ranking.py
+++ b/csrank/tests/test_ranking.py
@@ -17,7 +17,7 @@
     FATELINEAR_RANKER: (FATELinearObjectRanker, {"n_hidden_set_units": 12, "batch_size": 1}, (0.0, 1.0)),
     FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)),
     LAMBDAMART: (LambdaMART, {"min_samples_split": 2, "max_depth": 50, "min_samples_leaf": 1,
-                              "max_leaf_nodes": 10}, (0.66, 0.0)),
+                              "max_leaf_nodes": 10}, (0.4, 0.0)),
     FETA_RANKER: (FETAObjectRanker, {"add_zeroth_order_model": True, "optimizer": optimizer}, (0.0, 1.0)),
     RANKNET: (RankNet, {"optimizer": optimizer}, (0.0, 1.0)),
     CMPNET: (CmpNet, {"optimizer": optimizer}, (0.0, 1.0)),