diff --git a/.travis.yml b/.travis.yml index c93d6249..0821bfad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - 3.7.9 # Pinned since tensorflow 1.x is not available for python > 3.7 + - 3.8 cache: directories: @@ -23,18 +23,16 @@ stages: jobs: fast_finish: true include: - - python: 3.7.9 + - python: 3.8 env: TOXENV=test1 - - python: 3.7.9 + - python: 3.8 env: TOXENV=test2 - - python: 3.7.9 + - python: 3.8 env: TOXENV=test3 - - python: 3.7.9 - env: TOXENV=test4 - - python: 3.7.9 + - python: 3.8 env: TOXENV=lint - stage: docs - python: 3.7.9 + python: 3.8 env: TOXENV=docs before_deploy: @@ -47,6 +45,6 @@ deploy: script: poetry publish -v --build on: tags: true - python: 3.7.9 + python: 3.8 repo: kiudee/cs-ranking branch: master diff --git a/HISTORY.rst b/HISTORY.rst index 23e8ff9d..c1034f52 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,7 +2,14 @@ History ======= -Unreleased +2.0.0 (Unreleased) +------------------ + +* The library has been migrated to pytorch. This is a breaking change. You will + likely need to adapt to this new version if you have been using estimators + from version 1.x. + +1.3.0 (Unreleased) ------------------ * We no longer override any of the defaults of our default optimizer (SGD). In diff --git a/README.rst b/README.rst index b8c096f2..217d3f62 100644 --- a/README.rst +++ b/README.rst @@ -1,19 +1,29 @@ |Build Status| |Coverage| |Binder| +**** +NOTE +**** + +This repository is currently in the process of a migration from tensorflow to +PyTorch. You should use the latest released version if you are not interested +in the partial PyTorch implementation. + ******* CS-Rank ******* + CS-Rank is a Python package for context-sensitive ranking and choice algorithms. We implement the following new object ranking/choice architectures: * FATE (First aggregate then evaluate) -* FETA (First evaluate then aggregate) +* FETA (First evaluate then aggregate) (currently not available due to the + PyTorch migration) In addition, we also implement these algorithms for choice functions: -* RankNetChoiceFunction +* RankNetChoiceFunction (currently not available due to the PyTorch migration) * GeneralizedLinearModel * PairwiseSVMChoiceFunction @@ -24,12 +34,10 @@ setting: * MixedLogitModel * NestedLogitModel * PairedCombinatorialLogit -* RankNetDiscreteChoiceFunction +* RankNetDiscreteChoiceFunction (currently not available due to the PyTorch + migration) * PairwiseSVMDiscreteChoiceFunction -Check out our `interactive notebooks`_ to quickly find out what our package can -do. - Getting started =============== @@ -73,7 +81,7 @@ Another option is to clone the repository and install CS-Rank using:: Dependencies ------------ -CS-Rank depends on Tensorflow, Keras, NumPy, SciPy, matplotlib, scikit-learn, +CS-Rank depends on PyTorch, skorch, NumPy, SciPy, matplotlib, scikit-learn, joblib and tqdm. For data processing and generation you will also need PyGMO, H5Py and pandas. diff --git a/csrank/callbacks.py b/csrank/callbacks.py deleted file mode 100644 index eb274368..00000000 --- a/csrank/callbacks.py +++ /dev/null @@ -1,203 +0,0 @@ -import logging -import math - -from keras import backend as K -from keras.callbacks import Callback -import numpy as np - -logger = logging.getLogger(__name__) - - -class EarlyStoppingWithWeights(Callback): - def __init__( - self, - monitor="val_loss", - min_delta=0, - patience=0, - verbose=0, - mode="auto", - baseline=None, - restore_best_weights=False, - **kwargs, - ): - """Stop training when a monitored quantity has stopped improving. - - Parameters - ---------- - monitor: string - Quantity to be monitored, could be the loss or an accuracy value, which is monitored by the model. - In case of accuracy the we check the change in increase, while in case of loss we check the change in - decrease of the loss - min_delta: float - Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less - than min_delta, will count as no improvement - patience: unsigned int - number of epochs with no improvement after which training will be stopped - verbose : bool - verbosity mode, 1: print, 0 to not - mode: one of {auto, min, max}. - In `min` mode, training will stop when the quantity monitored has stopped decreasing; in `max` mode - it will stop when the quantity monitored has stopped increasing; in `auto` mode, the direction is - automatically inferred from the name of the monitored quantity - baseline: float - Baseline value for the monitored quantity to reach. Training will stop if the model doesn't show - improvement over the baseline - restore_best_weights: bool - whether to restore model weights from the epoch with the best value of the monitored quantity. - If False, the model weights obtained at the last step of training are used - **kwargs - Keyword arguments for the callback - """ - super(EarlyStoppingWithWeights, self).__init__() - self.monitor = monitor - self.baseline = baseline - self.patience = patience - self.verbose = verbose - self.min_delta = min_delta - self.wait = 0 - self.stopped_epoch = 0 - self.restore_best_weights = restore_best_weights - self.best_weights = None - - known_modes = {"auto", "min", "max"} - if mode not in known_modes: - raise ValueError( - f"EarlyStopping mode {mode} is unknown, must be one of {known_modes}" - ) - - if mode == "min": - self.monitor_op = np.less - elif mode == "max": - self.monitor_op = np.greater - else: - if "acc" in self.monitor: - self.monitor_op = np.greater - else: - self.monitor_op = np.less - - if self.monitor_op == np.greater: - self.min_delta *= 1 - else: - self.min_delta *= -1 - - def on_train_begin(self, logs=None): - # Allow instances to be re-used - self.wait = 0 - self.stopped_epoch = 0 - if self.baseline is not None: - self.best = self.baseline - else: - self.best = np.Inf if self.monitor_op == np.less else -np.Inf - - def on_epoch_end(self, epoch, logs=None): - self.stopped_epoch += 1 - current = logs.get(self.monitor) - self.best_weights = self.model.get_weights() - if current is None: - logger.warning( - "Early stopping conditioned on metric `%s` which is not available. Available metrics are: %s" - % (self.monitor, ",".join(list(logs.keys()))), - RuntimeWarning, - ) - return - if self.monitor_op(current - self.min_delta, self.best): - self.best = current - self.best_weights = self.model.get_weights() - self.wait = 0 - else: - self.wait += 1 - if self.wait >= self.patience: - self.model.stop_training = True - - def on_train_end(self, logs=None): - if self.stopped_epoch > 0: - logger.info( - "Setting best weights for final epoch {}".format(self.stopped_epoch) - ) - self.model.set_weights(self.best_weights) - - -class LRScheduler(Callback): - def __init__(self, epochs_drop=300, drop=0.1, verbose=0, **kwargs): - """Learning rate scheduler with step-decay function - - Parameters - ---------- - epochs_drop: unsigned int - The number of epochs after which the learning rate is reduced - drop: float [0,1): - The percentage of the learning rate which needs to be dropped - verbose: bool or int in {0,1} - int. 0: quiet, 1: update messages - **kwargs - Keyword arguments for the callback - """ - super(LRScheduler, self).__init__(**kwargs) - self.verbose = verbose - self.epochs_drop = epochs_drop - self.drop = drop - self.initial_lr = None - - def step_decay(self, epoch): - """The step-decay function, which takes the current epoch and update the learning rate according to the - formulae. - - .. math:: - lr = lr_0 * d_r^{\\lfloor \\frac{e}{e_{\\text{drop}}}\\rfloor} - - where :math:`lr_0` is the learning rate at the zeroth epoch and :math:`0 < d_r < 1` is the rate with which - the learning rate should be reduced, :math:`e` is the current epoch and :math:`e_{\\text{drop}}` - is the number of epochs after which the learning rate is decreased. - - Parameters - ---------- - epoch: unsigned int - Current epoch - """ - step = math.floor((1 + epoch) / self.epochs_drop) - new_lr = self.initial_lr * math.pow(self.drop, step) - return new_lr - - def on_epoch_begin(self, epoch, logs=None): - if not hasattr(self.model.optimizer, "lr"): - raise ValueError('Optimizer must have a "lr" attribute.') - if epoch == 0: - self.initial_lr = float(K.get_value(self.model.optimizer.lr)) - lr = self.step_decay(epoch) - K.set_value(self.model.optimizer.lr, lr) - if self.verbose > 0: - print( - "\nEpoch %05d: LearningRateScheduler setting learning " - "rate to %s." % (epoch + 1, lr) - ) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs["lr"] = K.get_value(self.model.optimizer.lr) - - -class DebugOutput(Callback): - def __init__(self, delta=100, **kwargs): - """Logging the epochs when done. - - Parameters - ---------- - delta: unsigned int - The number of epochs after which the message is logged - kwargs: - Keyword arguments - """ - super(DebugOutput, self).__init__(**kwargs) - self.delta = delta - self.epoch = 0 - - def on_train_end(self, logs=None): - logger.debug("Total number of epochs: {}".format(self.epoch)) - - def on_train_begin(self, logs=None): - self.epoch = 0 - - def on_epoch_end(self, epoch, logs=None): - self.epoch += 1 - if self.epoch % self.delta == 0: - logger.debug("Epoch {} of the training finished.".format(self.epoch)) diff --git a/csrank/choicefunction/__init__.py b/csrank/choicefunction/__init__.py index 41b4aff4..98e3bac3 100644 --- a/csrank/choicefunction/__init__.py +++ b/csrank/choicefunction/__init__.py @@ -1,21 +1,11 @@ from .baseline import AllPositive -from .cmpnet_choice import CmpNetChoiceFunction from .fate_choice import FATEChoiceFunction -from .fatelinear_choice import FATELinearChoiceFunction -from .feta_choice import FETAChoiceFunction -from .fetalinear_choice import FETALinearChoiceFunction from .generalized_linear_model import GeneralizedLinearModel from .pairwise_choice import PairwiseSVMChoiceFunction -from .ranknet_choice import RankNetChoiceFunction __all__ = [ "AllPositive", - "CmpNetChoiceFunction", "FATEChoiceFunction", - "FATELinearChoiceFunction", - "FETAChoiceFunction", - "FETALinearChoiceFunction", "GeneralizedLinearModel", "PairwiseSVMChoiceFunction", - "RankNetChoiceFunction", ] diff --git a/csrank/choicefunction/choice_functions.py b/csrank/choicefunction/choice_functions.py index e6a4b8b6..44c3ef9c 100644 --- a/csrank/choicefunction/choice_functions.py +++ b/csrank/choicefunction/choice_functions.py @@ -2,8 +2,11 @@ import logging import numpy as np +import skorch +import torch.nn as nn from csrank.constants import CHOICE_FUNCTION +from csrank.learner import SkorchInstanceEstimator from csrank.metrics_np import f1_measure from csrank.util import progress_bar @@ -69,3 +72,49 @@ def _tune_threshold(self, X_val, Y_val, thin_thresholds=1, verbose=0): " a micro F1-measure of {:.2f}".format(threshold, best) ) return threshold + + +class SkorchChoiceFunction(ChoiceFunctions, SkorchInstanceEstimator): + """A variable choice estimator based on some scoring module. + + This estimator takes a scoring module and combines it with a sigmoid + activation to predict scores between 0 and 1. The choice is then made based + on a fixed threshold value. This makes it very simple to derive new + estimators with any given scoring function. Refer to skorch's documentation + for supported parameters. For example the optimizer or the optimizer's + learning rate could be overridden. + + Parameters + ---------- + module : torch module (class) + This is the scoring module. It should be an uninstantiated + ``torch.nn.Module`` class that expects the number of features per + object as its only parameter on initialization. + + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. + + threshold : float + The threshold value that is used to convert scores to a choice. Must be + between 0 and 1. Defaults to 0.5. + + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``skorch.NeuralNet``. See the documentation of that class for more + details. + """ + + def __init__(self, module, criterion=nn.BCELoss, threshold=0.5, **kwargs): + super().__init__(module=module, criterion=criterion, **kwargs) + # The scoring is trained to predict something close to "0" for + # non-chosen values, something close to "1" for chosen values. So 0.5 + # is a natural threshold. It would be possible to additionally tune + # that threshold. + self.threshold_ = threshold + + def initialize_module(self, *args, **kwargs): + params = self.get_params_for("module") + # Add a Sigmoid activation since the resulting "scores" should be + # between 0 and 1. + self.module_ = nn.Sequential(self.module(**params), nn.Sigmoid()) + self.module_ = skorch.utils.to_device(self.module_, self.device) diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py deleted file mode 100644 index 8c1d1562..00000000 --- a/csrank/choicefunction/cmpnet_choice.py +++ /dev/null @@ -1,182 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 -from sklearn.model_selection import train_test_split - -from csrank.choicefunction.choice_functions import ChoiceFunctions -from csrank.choicefunction.util import generate_complete_pairwise_dataset -from csrank.core.cmpnet_core import CmpNetCore - -logger = logging.getLogger(__name__) - - -class CmpNetChoiceFunction(ChoiceFunctions, CmpNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create an instance of the :class:`CmpNetCore` architecture for learning a choice function. - CmpNet breaks the preferences in form of rankings into pairwise comparisons and learns a pairwise model for - the each pair of object in the underlying set. For prediction list of objects is converted in pair of - objects and the pairwise predicate is evaluated using them. The outputs of the network for each pair of - objects :math:`U(x_1,x_2), U(x_2,x_1)` are evaluated. - :math:`U(x_1,x_2)` is a measure of how favorable it is to choose :math:`x_1` over :math:`x_2`. - The utility score of object :math:`x_i` in query set - :math:`Q = \\{ x_1 , \\ldots , x_n \\}` is evaluated as: - - .. math:: - - U(x_i) = \\left\\{ \\frac{1}{n-1} \\sum_{j \\in [n] - \\setminus \\{i\\}} U_1(x_i , x_j)\\right\\} - - The choice set is defined as: - - .. math:: - - c(Q) = \\{ x_i \\in Q \\lvert \\, U(x_i) > t \\} - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudorandom generator or a RandomState instance - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - - References - ---------- - [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875 - - """ - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - ) - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - x1, x2, garbage, y_double, garbage = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_double - - def fit( - self, - X, - Y, - epochs=10, - callbacks=None, - validation_split=0.1, - tune_size=0.1, - thin_thresholds=1, - verbose=0, - **kwd, - ): - """ - Fit a CmptNet model for learning a choice fucntion on the provided set of queries X and preferences Y of - those objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays). For - learning this network the binary cross entropy loss function for a pair of objects :math:`x_i, x_j \\in Q` - is defined as: - - .. math:: - - C_{ij} = -\\tilde{P_{ij}}(0)\\cdot \\log(U(x_i,x_j)) - \\tilde{P_{ij}}(1) \\cdot \\log(U(x_j,x_i)) \\ , - - where :math:`\\tilde{P_{ij}}` is ground truth probability of the preference of :math:`x_i` over :math:`x_j`. - :math:`\\tilde{P_{ij}} = (1,0)` if :math:`x_i \\succ x_j` else :math:`\\tilde{P_{ij}} = (0,1)`. - - Parameters - ---------- - X : numpy array - (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array - (n_instances, n_objects) - Preferences in form of Orderings or Choices for given n_objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - tune_size: float (range : [0,1]) - Percentage of instances to split off to tune the threshold for the choice function - thin_thresholds: int - The number of instances of scores to skip while tuning the threshold - verbose : bool - Print verbose information - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit( - X_train, - Y_train, - epochs, - callbacks, - validation_split, - verbose, - **kwd, - ) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd) - self.threshold_ = 0.5 - return self diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index a546d3a1..648e215e 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -1,183 +1,77 @@ -import logging +import functools -from keras.layers import Dense -from keras.losses import binary_crossentropy -from keras.optimizers import SGD -from keras.regularizers import l2 -from sklearn.model_selection import train_test_split +import torch.nn as nn -from csrank.core.fate_network import FATENetwork -from .choice_functions import ChoiceFunctions +from csrank.choicefunction.choice_functions import SkorchChoiceFunction +from csrank.modules.object_mapping import DenseNeuralNetwork +from csrank.modules.scoring import FATEScoring -logger = logging.getLogger(__name__) +class FATEChoiceFunction(SkorchChoiceFunction): + """A variable choice estimator based on the FATE-Approach. -class FATEChoiceFunction(ChoiceFunctions, FATENetwork): - def __init__( - self, - n_hidden_set_layers=2, - n_hidden_set_units=32, - n_hidden_joint_layers=2, - n_hidden_joint_units=32, - loss_function=binary_crossentropy, - activation="selu", - kernel_initializer="lecun_normal", - kernel_regularizer=l2, - optimizer=SGD, - batch_size=256, - metrics=(), - random_state=None, - **kwargs, - ): - """ - Create a FATE-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: + Parameters + ---------- + n_hidden_set_layers : int + The number of hidden layers that should be used for the ``DeepSet`` + context embedding. - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) + n_hidden_set_untis : int + The number of units per hidden layer that should be used for the + ``DeepSet`` context embedding. - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - The choice set is defined as: + n_hidden_joint_layers : int + The number of hidden layers that should be used for the utility + function that evaluates each object in the aggregated context. - .. math:: + n_hidden_joint_units : int + The number of units per hidden layer that should used for the utility + function that evaluates each object in the aggregated context. - c(Q) = \\{ x \\in Q \\lvert \\, U (x, \\mu_{C(x)}) > t \\} + activation : torch activation function (class) + The activation function that should be used for each layer of the two + ("set" and "joint) neural networks. + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. - Parameters - ---------- - n_hidden_set_layers : int - Number of set layers. - n_hidden_set_units : int - Number of hidden set units. - n_hidden_joint_layers : int - Number of joint layers. - n_hidden_joint_units : int - Number of joint units. - activation : string or function - Activation function to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - metrics : list - List of evaluation metrics (can be non-differentiable) - random_state : int or object - Numpy random state - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers. See the keras - documentation of ``Dense`` for available options. - """ - self.loss_function = loss_function - self.metrics = metrics - super().__init__( - n_hidden_set_layers=n_hidden_set_layers, - n_hidden_set_units=n_hidden_set_units, - n_hidden_joint_layers=n_hidden_joint_layers, - n_hidden_joint_units=n_hidden_joint_units, - activation=activation, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - optimizer=optimizer, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``SkorchChoiceFunction``. See the documentation of that class for more + details. + """ - def _construct_layers(self): - """ - Construct basic layers shared by all the objects: - * Joint dense hidden layers - * Output scoring layer is sigmoid output for choice model - - Connecting the layers is done in join_input_layers and will be done in implementing classes. - """ - logger.info( - "Construct joint layers hidden units {} and layers {} ".format( - self.n_hidden_joint_units, self.n_hidden_joint_layers - ) + def __init__( + self, + n_hidden_set_layers=2, + n_hidden_set_units=32, + n_hidden_joint_layers=2, + n_hidden_joint_units=32, + activation=nn.SELU, + criterion=nn.BCELoss, + **kwargs + ): + self.n_hidden_set_layers = n_hidden_set_layers + self.n_hidden_set_units = n_hidden_set_units + self.n_hidden_joint_layers = n_hidden_joint_layers + self.n_hidden_joint_units = n_hidden_joint_units + self.activation = activation + super().__init__(module=FATEScoring, criterion=criterion, **kwargs) + + def _get_extra_module_parameters(self): + """Return extra parameters that should be passed to the module.""" + params = super()._get_extra_module_parameters() + params["pairwise_utility_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_joint_layers, + units_per_hidden=self.n_hidden_joint_units, + activation=self.activation(), + output_size=1, ) - # Create joint hidden layers: - self.joint_layers = [] - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - for i in range(self.n_hidden_joint_layers): - self.joint_layers.append( - Dense( - self.n_hidden_joint_units, - name="joint_layer_{}".format(i), - **hidden_dense_kwargs, - ) - ) - logger.info("Construct output score node") - self.scorer = Dense( - 1, - name="output_node", - activation="sigmoid", - kernel_regularizer=self.kernel_regularizer_, + params["embedding_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_set_layers, + units_per_hidden=self.n_hidden_set_units, + activation=self.activation(), ) - - def fit( - self, X, Y, verbose=0, tune_size=0.1, thin_thresholds=1, **kwargs, - ): - """ - Fit a generic FATE-network model for learning a choice function on a provided set of queries. - - The provided queries can be of a fixed size (numpy arrays) or of varying sizes in which case dictionaries - are expected as input. For varying sizes a meta gradient descent is performed across the - different query sizes. - - Parameters - ---------- - X : numpy array or dict - Feature vectors of the objects - (n_instances, n_objects, n_features) if numpy array or map from n_objects to numpy arrays - Y : numpy array or dict - Choices for given objects in the query - (n_instances, n_objects) if numpy array or map from n_objects to numpy arrays - verbose : bool - Print verbose information - tune_size: float (range : [0,1]) - Percentage of instances to split off to tune the threshold - thin_thresholds: int - The number of instances of scores to skip while tuning the threshold - **kwargs : - Further keyword arguments for the @FATENetwork. See the - documentation of :func:`~csrank.core.FATENetwork.fit` for more - information. - """ - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit(X_train, Y_train, **kwargs) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, **kwargs) - self.threshold_ = 0.5 - return self + return params diff --git a/csrank/choicefunction/fatelinear_choice.py b/csrank/choicefunction/fatelinear_choice.py deleted file mode 100644 index c4a00579..00000000 --- a/csrank/choicefunction/fatelinear_choice.py +++ /dev/null @@ -1,101 +0,0 @@ -import logging - -from keras.losses import binary_crossentropy -from sklearn.model_selection import train_test_split - -from csrank.core.fate_linear import FATELinearCore -from .choice_functions import ChoiceFunctions - -logger = logging.getLogger(__name__) - - -class FATELinearChoiceFunction(ChoiceFunctions, FATELinearCore): - def __init__( - self, - n_hidden_set_units=32, - loss_function=binary_crossentropy, - learning_rate=1e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - n_hidden_set_units=n_hidden_set_units, - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) - - def fit( - self, - X, - Y, - epochs=10, - callbacks=None, - validation_split=0.1, - tune_size=0.1, - thin_thresholds=1, - verbose=0, - **kwd, - ): - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit( - X_train, - Y_train, - epochs, - callbacks, - validation_split, - verbose, - **kwd, - ) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd) - self.threshold_ = 0.5 - return self diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py deleted file mode 100644 index dc5f689c..00000000 --- a/csrank/choicefunction/feta_choice.py +++ /dev/null @@ -1,374 +0,0 @@ -from itertools import combinations -import logging - -from keras import backend as K -from keras import Input -from keras import Model -from keras.layers import Activation -from keras.layers import add -from keras.layers import concatenate -from keras.layers import Dense -from keras.layers import Lambda -from keras.losses import binary_crossentropy -from keras.optimizers import SGD -from keras.regularizers import l2 -import numpy as np -from sklearn.model_selection import train_test_split - -from csrank.core.feta_network import FETANetwork -from csrank.layers import NormalizedDense -from csrank.numpy_util import sigmoid -from .choice_functions import ChoiceFunctions - -logger = logging.getLogger(__name__) - - -class FETAChoiceFunction(ChoiceFunctions, FETANetwork): - def __init__( - self, - n_hidden=2, - n_units=8, - add_zeroth_order_model=False, - max_number_of_objects=10, - num_subsample=5, - loss_function=binary_crossentropy, - batch_normalization=False, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="selu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FETA-network architecture for learning choice functions. - The first-evaluate-then-aggregate approach approximates the context-dependent utility function using the - first-order utility function :math:`U_1 \\colon \\mathcal{X} \\times \\mathcal{X} \\rightarrow [0,1]` - and zeroth-order utility function :math:`U_0 \\colon \\mathcal{X} \\rightarrow [0,1]`. - The scores each object :math:`x` using a context-dependent utility function :math:`U (x, C_i)`: - - .. math:: - U(x_i, C_i) = U_0(x_i) + \\frac{1}{n-1} \\sum_{x_j \\in Q \\setminus \\{x_i\\}} U_1(x_i , x_j) \\, . - - Training and prediction complexity is quadratic in the number of objects. - The choice set is defined as: - - .. math:: - - c(Q) = \\{ x_i \\in Q \\lvert \\, U (x_i, C_i) > t \\} - - Parameters - ---------- - n_hidden : int - Number of hidden layers - n_units : int - Number of hidden units in each layer - add_zeroth_order_model : bool - True if the model should include a latent utility function - max_number_of_objects : int - The maximum number of objects to train from - num_subsample : int - Number of objects to subsample to - loss_function : function - Differentiable loss function for the score vector - batch_normalization : bool - Whether to use batch normalization in the hidden layers - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : string or function - Activation function to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of evaluation metrics (can be non-differentiable) - batch_size : int - Batch size to use for training - random_state : int or object - Numpy random state - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - """ - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - add_zeroth_order_model=add_zeroth_order_model, - max_number_of_objects=max_number_of_objects, - num_subsample=num_subsample, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - ) - - def _construct_layers(self): - self.input_layer = Input( - shape=(self.n_objects_fit_, self.n_object_features_fit_) - ) - # Todo: Variable sized input - # X = Input(shape=(None, n_features)) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - NormalizedDense( - self.n_units, - name="hidden_zeroth_{}".format(x), - *hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - Dense( - self.n_units, - name="hidden_zeroth_{}".format(x), - **hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - self.output_node = Dense( - 1, activation="linear", kernel_regularizer=self.kernel_regularizer_ - ) - if self.add_zeroth_order_model: - self.output_node_zeroth = Dense( - 1, activation="linear", kernel_regularizer=self.kernel_regularizer_ - ) - - def construct_model(self): - """ - Construct the :math:`1`-st order and :math:`0`-th order models, which are used to approximate the - :math:`U_1(x, C(x))` and the :math:`U_0(x)` utilities respectively. For each pair of objects in - :math:`x_i, x_j \\in Q` :math:`U_1(x, C(x))` we construct :class:`CmpNetCore` with weight sharing to - approximate a pairwise-matrix. A pairwise matrix with index (i,j) corresponds to the :math:`U_1(x_i,x_j)` - is a measure of how favorable it is to choose :math:`x_i` over :math:`x_j`. Using this matrix we calculate - the borda score for each object to calculate :math:`U_1(x, C(x))`. For `0`-th order model we construct - :math:`\\lvert Q \\lvert` sequential networks whose weights are shared to evaluate the :math:`U_0(x)` for - each object in the query set :math:`Q`. The output mode is using sigmoid activation. - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the FETA utility score - """ - - def create_input_lambda(i): - return Lambda(lambda x: x[:, i]) - - if self.add_zeroth_order_model: - logger.debug("Create 0th order model") - zeroth_order_outputs = [] - inputs = [] - for i in range(self.n_objects_fit_): - x = create_input_lambda(i)(self.input_layer) - inputs.append(x) - for hidden in self.hidden_layers_zeroth: - x = hidden(x) - zeroth_order_outputs.append(self.output_node_zeroth(x)) - zeroth_order_scores = concatenate(zeroth_order_outputs) - logger.debug("0th order model finished") - logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects_fit_)] - for i, j in combinations(range(self.n_objects_fit_), 2): - if self.add_zeroth_order_model: - x1 = inputs[i] - x2 = inputs[j] - else: - x1 = create_input_lambda(i)(self.input_layer) - x2 = create_input_lambda(j)(self.input_layer) - x1x2 = concatenate([x1, x2]) - x2x1 = concatenate([x2, x1]) - - for hidden in self.hidden_layers: - x1x2 = hidden(x1x2) - x2x1 = hidden(x2x1) - - merged_left = concatenate([x1x2, x2x1]) - merged_right = concatenate([x2x1, x1x2]) - - N_g = self.output_node(merged_left) - N_l = self.output_node(merged_right) - - outputs[i].append(N_g) - outputs[j].append(N_l) - # convert rows of pairwise matrix to keras layers: - outputs = [concatenate(x) for x in outputs] - - # compute utility scores: - scores = [ - Lambda(lambda s: K.mean(s, axis=1, keepdims=True))(x) for x in outputs - ] - scores = concatenate(scores) - logger.debug("1st order model finished") - if self.add_zeroth_order_model: - scores = add([scores, zeroth_order_scores]) - scores = Activation("sigmoid")(scores) - model = Model(inputs=self.input_layer, outputs=scores) - logger.debug("Compiling complete model...") - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _predict_scores_using_pairs(self, X, **kwd): - scores = super()._predict_scores_using_pairs(X=X, **kwd) - scores = sigmoid(scores) - return scores - - def fit( - self, - X, - Y, - epochs=10, - callbacks=None, - validation_split=0.1, - tune_size=0.1, - thin_thresholds=1, - verbose=0, - **kwd, - ): - """ - Fit a FETA-Network for learning a choice function on the provided set of queries X and preferences Y of - those objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays). - - Parameters - ---------- - X : numpy array (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array (n_instances, n_objects) - Choices for given objects in the query - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - tune_size: float (range : [0,1]) - Percentage of instances to split off to tune the threshold for the choice function - thin_thresholds: int - The number of instances of scores to skip while tuning the threshold - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit( - X_train, - Y_train, - epochs, - callbacks, - validation_split, - verbose, - **kwd, - ) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd) - self.threshold_ = 0.5 - return self - - def sub_sampling(self, X, Y): - if self.n_objects_fit_ <= self.max_number_of_objects: - return X, Y - n_objects = self.max_number_of_objects - bucket_size = int(X.shape[1] / n_objects) - X_train = [] - Y_train = [] - for x, y in zip(X, Y): - ind_1 = np.where(y == 1)[0] - p_1 = np.zeros(len(ind_1)) + 1 / len(ind_1) - if (y == 1).sum() < n_objects: - ind_0 = np.where(y == 0)[0] - p_0 = np.zeros(len(ind_0)) + 1 / len(ind_0) - positives = (y == 1).sum() if n_objects > (y == 1).sum() else n_objects - if positives > bucket_size: - cp = ( - self.random_state.choice( - positives, size=bucket_size, replace=False - ) - + 1 - ) - else: - cp = self.random_state.choice(positives, size=bucket_size) + 1 - idx = [] - for c in cp: - pos = self.random_state.choice( - len(ind_1), size=c, replace=False, p=p_1 - ) - if n_objects - c > len(ind_0): - neg = self.random_state.choice( - len(ind_0), size=n_objects - c, p=p_0 - ) - else: - neg = self.random_state.choice( - len(ind_0), size=n_objects - c, replace=False, p=p_0 - ) - p_0[neg] = 0.2 * p_0[neg] - p_0 = p_0 / p_0.sum() - i = np.concatenate((ind_1[pos], ind_0[neg])) - self.random_state.shuffle(i) - p_1[pos] = 0.2 * p_1[pos] - p_1 = p_1 / p_1.sum() - p_0[neg] = 0.2 * p_0[neg] - p_0 = p_0 / p_0.sum() - idx.append(i) - idx = np.array(idx) - else: - idx = self.random_state.choice(ind_1, size=(bucket_size, n_objects)) - idx = np.array(idx) - if len(X_train) == 0: - X_train = x[idx] - Y_train = y[idx] - else: - Y_train = np.concatenate([Y_train, y[idx]], axis=0) - X_train = np.concatenate([X_train, x[idx]], axis=0) - logger.info( - "Sampled instances {} objects {}".format(X_train.shape[0], X_train.shape[1]) - ) - return X_train, Y_train diff --git a/csrank/choicefunction/fetalinear_choice.py b/csrank/choicefunction/fetalinear_choice.py deleted file mode 100644 index d07b5191..00000000 --- a/csrank/choicefunction/fetalinear_choice.py +++ /dev/null @@ -1,99 +0,0 @@ -import logging - -from keras.losses import binary_crossentropy -from sklearn.model_selection import train_test_split - -from csrank.core.feta_linear import FETALinearCore -from .choice_functions import ChoiceFunctions - -logger = logging.getLogger(__name__) - - -class FETALinearChoiceFunction(ChoiceFunctions, FETALinearCore): - def __init__( - self, - loss_function=binary_crossentropy, - learning_rate=5e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) - - def fit( - self, - X, - Y, - epochs=10, - callbacks=None, - validation_split=0.1, - tune_size=0.1, - thin_thresholds=1, - verbose=0, - **kwd, - ): - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit( - X_train, - Y_train, - epochs, - callbacks, - validation_split, - verbose, - **kwd, - ) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd) - self.threshold_ = 0.5 - return self diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py deleted file mode 100644 index 1aa8bb88..00000000 --- a/csrank/choicefunction/ranknet_choice.py +++ /dev/null @@ -1,169 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 -from sklearn.model_selection import train_test_split - -from csrank.core.ranknet_core import RankNetCore -from .choice_functions import ChoiceFunctions -from .util import generate_complete_pairwise_dataset - -logger = logging.getLogger(__name__) - - -class RankNetChoiceFunction(ChoiceFunctions, RankNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create an instance of the :class:`RankNetCore` architecture for learning a object ranking function. - It breaks the preferences into pairwise comparisons and learns a latent utility model for the objects. - This network learns a latent utility score for each object in the given query set - :math:`Q = \\{x_1, \\ldots ,x_n\\}` using the equation :math:`U(x) = F(x, w)` where :math:`w` is the weight - vector. It is estimated using *pairwise preferences* generated from the choices. - The choice set is defined as: - - .. math:: - - c(Q) = \\{ x_i \\in Q \\lvert \\, U(x_i) > t \\} - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudo-random generator or a RandomState instance - **kwargs - Keyword arguments for the algorithms - - References - ---------- - [1] Burges, C. et al. (2005, August). "Learning to rank using gradient descent.", In Proceedings of the 22nd international conference on Machine learning (pp. 89-96). ACM. - - [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581). - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_single - - def fit( - self, - X, - Y, - epochs=10, - callbacks=None, - validation_split=0.1, - tune_size=0.1, - thin_thresholds=1, - verbose=0, - **kwd, - ): - """ - Fit RankNet model for learning choice function on a provided set of queries. The provided queries can be of - a fixed size (numpy arrays). For learning this network the binary cross entropy loss function for a pair of - objects :math:`x_i, x_j \\in Q` is defined as: - - .. math:: - - C_{ij} = -\\tilde{P_{ij}}\\log(P_{ij}) - (1 - \\tilde{P_{ij}})\\log(1 - P{ij}) \\enspace, - - where :math:`\\tilde{P_{ij}}` is ground truth probability of the preference of :math:`x_i` over :math:`x_j`. - :math:`\\tilde{P_{ij}} = 1` if :math:`x_i \\succ x_j` else :math:`\\tilde{P_{ij}} = 0`. - - Parameters - ---------- - X : numpy array (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array (n_instances, n_objects) - Preferences in form of Orderings or Choices for given n_objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - tune_size: float (range : [0,1]) - Percentage of instances to split off to tune the threshold for the choice function - thin_thresholds: int - The number of instances of scores to skip while tuning the threshold - verbose : bool - Print verbose information - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - if tune_size > 0: - X_train, X_val, Y_train, Y_val = train_test_split( - X, Y, test_size=tune_size, random_state=self.random_state - ) - try: - super().fit( - X_train, - Y_train, - epochs, - callbacks, - validation_split, - verbose, - **kwd, - ) - finally: - logger.info( - "Fitting utility function finished. Start tuning threshold." - ) - self.threshold_ = self._tune_threshold( - X_val, Y_val, thin_thresholds=thin_thresholds, verbose=verbose - ) - else: - super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd) - self.threshold_ = 0.5 - return self diff --git a/csrank/core/__init__.py b/csrank/core/__init__.py index 5b2071fb..e70cac5c 100644 --- a/csrank/core/__init__.py +++ b/csrank/core/__init__.py @@ -1,19 +1,5 @@ -from .cmpnet_core import CmpNetCore -from .fate_linear import FATELinearCore -from .fate_network import FATENetwork -from .fate_network import FATENetworkCore -from .feta_linear import FETALinearCore -from .feta_network import FETANetwork from .pairwise_svm import PairwiseSVM -from .ranknet_core import RankNetCore __all__ = [ - "CmpNetCore", - "FATELinearCore", - "FATENetwork", - "FATENetworkCore", - "FETALinearCore", - "FETANetwork", "PairwiseSVM", - "RankNetCore", ] diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py deleted file mode 100644 index 932a2ee3..00000000 --- a/csrank/core/cmpnet_core.py +++ /dev/null @@ -1,201 +0,0 @@ -from itertools import permutations -import logging - -from keras import Input -from keras import Model -from keras.layers import concatenate -from keras.layers import Dense -from keras.optimizers import SGD -from keras.regularizers import l2 -import numpy as np -from sklearn.utils import check_random_state - -from csrank.layers import NormalizedDense -from csrank.learner import Learner - -logger = logging.getLogger(__name__) - - -class CmpNetCore(Learner): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - self.batch_normalization = batch_normalization - self.activation = activation - - self.batch_size = batch_size - - self.metrics = metrics - self.kernel_regularizer = kernel_regularizer - self.kernel_initializer = kernel_initializer - self.loss_function = loss_function - - self.optimizer = optimizer - - self.n_hidden = n_hidden - self.n_units = n_units - self.random_state = random_state - self._store_kwargs( - kwargs, {"kernel_regularizer__", "optimizer__", "hidden_dense_layer__"} - ) - - def _construct_layers(self): - - self.output_node = Dense( - 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer_ - ) - - self.x1 = Input(shape=(self.n_object_features_fit_,)) - self.x2 = Input(shape=(self.n_object_features_fit_,)) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - - def _convert_instances_(self, X, Y): - raise NotImplementedError - - def construct_model(self): - """ - Construct the CmpNet which is used to approximate the :math:`U_1(x_i,x_j)`. For each pair of objects in - :math:`x_i, x_j \\in Q` we construct two sub-networks with weight sharing in all hidden layers. - The output of these networks are connected to two sigmoid units that produces the outputs of the network, - i.e., :math:`U(x_1,x_2), U(x_2,x_1)` for each pair of objects are evaluated. :math:`U(x_1,x_2)` is a measure - of how favorable it is to choose :math:`x_1` over :math:`x_2`. - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the CmpNet utility score - """ - x1x2 = concatenate([self.x1, self.x2]) - x2x1 = concatenate([self.x2, self.x1]) - logger.debug("Creating the model") - for hidden in self.hidden_layers: - x1x2 = hidden(x1x2) - x2x1 = hidden(x2x1) - merged_left = concatenate([x1x2, x2x1]) - merged_right = concatenate([x2x1, x1x2]) - N_g = self.output_node(merged_left) - N_l = self.output_node(merged_right) - merged_output = concatenate([N_g, N_l]) - model = Model(inputs=[self.x1, self.x2], outputs=merged_output) - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - self._initialize_optimizer() - self._initialize_regularizer() - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - """ - Fit a generic preference learning CmptNet on the provided set of queries X and preferences Y of those - objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays). - For learning this network the binary cross entropy loss function for a pair of objects - :math:`x_i, x_j \\in Q` is defined as: - - .. math:: - - C_{ij} = -\\tilde{P_{ij}}(0)\\cdot \\log(U(x_i,x_j)) - \\tilde{P_{ij}}(1) \\cdot \\log(U(x_j,x_i)) \\ , - - where :math:`\\tilde{P_{ij}}` is ground truth probability of the preference of :math:`x_i` over :math:`x_j`. - :math:`\\tilde{P_{ij}} = (1,0)` if :math:`x_i \\succ x_j` else :math:`\\tilde{P_{ij}} = (0,1)`. - - Parameters - ---------- - X : numpy array - (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array - (n_instances, n_objects) - Preferences in form of Orderings or Choices for given n_objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - - self._construct_layers() - self.model_ = self.construct_model() - - if self.n_objects_fit_ < 2: - # Nothing to learn here, no pairwise comparisons can be generated. - return self - x1, x2, y_double = self._convert_instances_(X, Y) - logger.debug("Instances created {}".format(x1.shape[0])) - logger.debug("Finished Creating the model, now fitting started") - self.model_.fit( - [x1, x2], - y_double, - batch_size=self.batch_size, - epochs=epochs, - callbacks=callbacks, - validation_split=validation_split, - verbose=verbose, - **kwd, - ) - logger.debug("Fitting Complete") - return self - - def predict_pair(self, a, b, **kwargs): - return self.model_.predict([a, b], **kwargs) - - def _predict_scores_fixed(self, X, **kwargs): - n_instances, n_objects, n_features = X.shape - logger.info("Test Set instances {} objects {} features {}".format(*X.shape)) - n2 = n_objects * (n_objects - 1) - pairs = np.empty((n2, 2, n_features)) - scores = np.empty((n_instances, n_objects)) - for n in range(n_instances): - for k, (i, j) in enumerate(permutations(range(n_objects), 2)): - pairs[k] = (X[n, i], X[n, j]) - result = self.predict_pair(pairs[:, 0], pairs[:, 1], **kwargs)[:, 0] - scores[n] = result.reshape(n_objects, n_objects - 1).mean(axis=1) - del result - del pairs - logger.info("Done predicting scores") - - return scores diff --git a/csrank/core/fate_linear.py b/csrank/core/fate_linear.py deleted file mode 100644 index 8c60debf..00000000 --- a/csrank/core/fate_linear.py +++ /dev/null @@ -1,148 +0,0 @@ -import logging -import math - -from keras.losses import binary_crossentropy -import numpy as np -from sklearn.utils import check_random_state -import tensorflow as tf - -from csrank.learner import Learner -from csrank.numpy_util import sigmoid -from csrank.util import progress_bar - -logger = logging.getLogger(__name__) - - -class FATELinearCore(Learner): - def __init__( - self, - n_hidden_set_units=32, - learning_rate=1e-3, - batch_size=256, - loss_function=binary_crossentropy, - epochs_drop=300, - drop=0.1, - random_state=None, - **kwargs, - ): - self.n_hidden_set_units = n_hidden_set_units - self.learning_rate = learning_rate - self.batch_size = batch_size - self.random_state = random_state - self.loss_function = loss_function - self.epochs_drop = epochs_drop - self.drop = drop - - def _construct_model_(self, n_objects): - self.X = tf.placeholder( - "float32", [None, n_objects, self.n_object_features_fit_] - ) - self.Y = tf.placeholder("float32", [None, n_objects]) - std = 1 / np.sqrt(self.n_object_features_fit_) - self.b1 = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=self.n_hidden_set_units), - dtype=tf.float32, - ) - self.W1 = tf.Variable( - self.random_state_.normal( - loc=0, - scale=std, - size=(self.n_object_features_fit_, self.n_hidden_set_units), - ), - dtype=tf.float32, - ) - self.W2 = tf.Variable( - self.random_state_.normal( - loc=0, - scale=std, - size=(self.n_object_features_fit_ + self.n_hidden_set_units), - ), - dtype=tf.float32, - ) - self.b2 = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 - ) - - set_rep = ( - tf.reduce_mean(tf.tensordot(self.X, self.W1, axes=1), axis=1) + self.b1 - ) - - self.set_rep = tf.reshape( - tf.tile(set_rep, tf.constant([1, n_objects])), - (-1, n_objects, self.n_hidden_set_units), - ) - self.X_con = tf.concat([self.X, self.set_rep], axis=-1) - scores = tf.sigmoid(tf.tensordot(self.X_con, self.W2, axes=1) + self.b2) - scores = tf.cast(scores, tf.float32) - self.loss_ = self.loss_function(self.Y, scores) - self.optimizer_ = tf.train.GradientDescentOptimizer( - self.learning_rate - ).minimize(self.loss_) - - def step_decay(self, epoch): - step = math.floor((1 + epoch) / self.epochs_drop) - self.current_lr_ = self.learning_rate * math.pow(self.drop, step) - self.optimizer_ = tf.train.GradientDescentOptimizer(self.current_lr_).minimize( - self.loss_ - ) - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - self._pre_fit() - # Global Variables Initializer - n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - self._construct_model_(self.n_objects_fit_) - init = tf.global_variables_initializer() - - with tf.Session() as tf_session: - tf_session.run(init) - self._fit_(X, Y, epochs, n_instances, tf_session, verbose) - training_cost = tf_session.run(self.loss_, feed_dict={self.X: X, self.Y: Y}) - logger.info( - "Fitting completed {} epochs done with loss {}".format( - epochs, training_cost.mean() - ) - ) - self.weight1_ = tf_session.run(self.W1) - self.bias1_ = tf_session.run(self.b1) - self.weight2_ = tf_session.run(self.W2) - self.bias2_ = tf_session.run(self.b2) - return self - - def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): - try: - for epoch in range(epochs): - for start in range(0, n_instances, self.batch_size): - end = np.min([start + self.batch_size, n_instances]) - tf_session.run( - self.optimizer_, - feed_dict={self.X: X[start:end], self.Y: Y[start:end]}, - ) - if verbose == 1: - progress_bar(end, n_instances, status="Fitting") - if verbose == 1: - c = tf_session.run(self.loss_, feed_dict={self.X: X, self.Y: Y}) - print("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - if (epoch + 1) % 100 == 0: - c = tf_session.run(self.loss_, feed_dict={self.X: X, self.Y: Y}) - logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - self.step_decay(epoch) - except KeyboardInterrupt: - logger.info("Interrupted") - c = tf_session.run(self.loss_, feed_dict={self.X: X, self.Y: Y}) - logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - - def _predict_scores_fixed(self, X, **kwargs): - n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features_fit_ - rep = np.mean(np.dot(X, self.weight1_), axis=1) + self.bias1_ - rep = np.tile(rep[:, np.newaxis, :], (1, n_objects, 1)) - X_n = np.concatenate((X, rep), axis=2) - scores = np.dot(X_n, self.weight2_) + self.bias2_ - scores = sigmoid(scores) - return scores diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py deleted file mode 100644 index 72c55bd5..00000000 --- a/csrank/core/fate_network.py +++ /dev/null @@ -1,657 +0,0 @@ -import logging - -from keras.layers import Dense -from keras.layers import Input -from keras.layers.merge import concatenate -from keras.models import Model -from keras.optimizers import SGD -from keras.regularizers import l2 -import numpy as np -from sklearn.utils import check_random_state - -from csrank.layers import create_input_lambda -from csrank.layers import DeepSet -from csrank.learner import Learner - -__all__ = ["FATENetwork", "FATENetworkCore"] -logger = logging.getLogger(__name__) - - -class FATENetworkCore(Learner): - def __init__( - self, - n_hidden_joint_layers=2, - n_hidden_joint_units=32, - activation="selu", - kernel_initializer="lecun_normal", - kernel_regularizer=l2, - optimizer=SGD, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATE-network architecture. - Training and prediction complexity is linear in the number of objects. - - Parameters - ---------- - n_hidden_joint_layers : int - Number of joint layers. - n_hidden_joint_units : int - Number of hidden units in each joint layer - activation : string or function - Activation function to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - kernel_regularizer__{kwarg}: - Arguments to be passed to the kernel regularizer on initialization. - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - batch_size : int - Batch size to use for training - random_state : int or object - Numpy random state - hidden_dense_layer__{kwarg} - Arguments to be passed to the hidden Dense layers. See the - keras documentation for ``Dense`` for available options. - """ - self.random_state = random_state - - self.n_hidden_joint_layers = n_hidden_joint_layers - self.n_hidden_joint_units = n_hidden_joint_units - - self.activation = activation - self.kernel_initializer = kernel_initializer - self.kernel_regularizer = kernel_regularizer - self.batch_size = batch_size - self.optimizer = optimizer - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - - def _construct_layers(self): - """ - Construct basic layers shared by all ranking algorithms: - * Joint dense hidden layers - * Output scoring layer - - Connecting the layers is done in join_input_layers and will be done in implementing classes. - """ - logger.info( - "Construct joint layers hidden units {} and layers {} ".format( - self.n_hidden_joint_units, self.n_hidden_joint_layers - ) - ) - # Create joint hidden layers: - self.joint_layers = [] - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - for i in range(self.n_hidden_joint_layers): - self.joint_layers.append( - Dense( - self.n_hidden_joint_units, - name="joint_layer_{}".format(i), - **hidden_dense_kwargs, - ) - ) - - logger.info("Construct output score node") - self.scorer = Dense( - 1, - name="output_node", - activation="linear", - kernel_regularizer=self.kernel_regularizer_, - ) - - def join_input_layers(self, input_layer, *layers, n_layers, n_objects): - """ - Accepts input tensors and an arbitrary number of feature tensors and concatenates them into a joint layer. - The input layers need to be given separately, because they need to be iterated over. - - Parameters - ---------- - input_layer : input tensor (n_objects, n_features) - layers : tensors - A number of tensors representing feature representations - n_layers : int - Number of hidden set layers - n_objects : int - Number of objects - """ - logger.debug("Joining set representation and joint layers") - scores = [] - - inputs = [create_input_lambda(i)(input_layer) for i in range(n_objects)] - - for i in range(n_objects): - if n_layers >= 1: - joint = concatenate([inputs[i], *layers]) - else: - joint = inputs[i] - for j in range(self.n_hidden_joint_layers): - joint = self.joint_layers[j](joint) - scores.append(self.scorer(joint)) - scores = ( - concatenate(scores, name="final_scores") if len(scores) > 1 else scores[0] - ) - logger.debug("Done") - - return scores - - def _pre_fit(self): - super()._pre_fit() - self._initialize_optimizer() - self._initialize_regularizer() - self._construct_layers() - - -class FATENetwork(FATENetworkCore): - def __init__(self, n_hidden_set_layers=2, n_hidden_set_units=32, **kwargs): - """ - Create a FATE-network architecture. - Training and prediction complexity is linear in the number of objects. - - Parameters - ---------- - n_hidden_set_layers : int - Number of hidden set layers. - n_hidden_set_units : int - Number of hidden units in each set layer - **kwargs - Keyword arguments for the hidden set units - """ - FATENetworkCore.__init__(self, **kwargs) - - self.n_hidden_set_layers = n_hidden_set_layers - self.n_hidden_set_units = n_hidden_set_units - - def _create_set_layers(self, **kwargs): - """ - Create layers for learning the representation of the query set. The actual connection of the layers is done - during fitting, since we do not know the size(s) of the set(s) in advance. - """ - logger.info( - "Creating set layers with set units {} set layer {} ".format( - self.n_hidden_set_units, self.n_hidden_set_layers - ) - ) - if self.n_hidden_set_layers >= 1: - self.set_layer_ = DeepSet( - units=self.n_hidden_set_units, layers=self.n_hidden_set_layers, **kwargs - ) - else: - self.set_layer_ = None - - @staticmethod - def _bucket_frequencies(X, min_bucket_size=32): - """ - Calculates the relative frequency of each ranking bucket. - - Parameters - ---------- - X : dict - map from n_objects to object queries - min_bucket_size : int - Minimum number of instances for a query size to be considered for - the frequency calculation - - Returns - ------- - freq : dict - map from n_objects to frequency in float - - """ - freq = dict() - total = 0.0 - for n_objects, arr in X.items(): - n_instances = arr.shape[0] - if n_instances >= min_bucket_size: - freq[n_objects] = n_instances - total += freq[n_objects] - else: - freq[n_objects] = 0 - for n_objects in freq.keys(): - freq[n_objects] /= total - return freq - - def _construct_models(self, buckets): - models = dict() - n_features = self.n_object_features_fit_ - - for n_objects in buckets.keys(): - model = self.construct_model(n_features, n_objects) - models[n_objects] = model - return models - - def get_weights(self, n_objects=None): - if self.is_variadic_: - if n_objects is not None: - weights = self.models_[n_objects].get_weights() - else: - weights = self.models_[n_objects].get_weights() - else: - weights = self.model_.get_weights() - return weights - - def set_weights(self, weights, n_objects=None): - if self.is_variadic_: - if n_objects is not None: - self.models_[n_objects].set_weights(weights) - else: - self.models_[0].set_weights(weights) - else: - self.model_.set_weights(weights) - - def _fit( - self, - X=None, - Y=None, - generator=None, - epochs=35, - inner_epochs=1, - callbacks=None, - validation_split=0.1, - verbose=0, - global_lr=1.0, - global_momentum=0.9, - min_bucket_size=500, - refit=False, - optimizer=None, - **kwargs, - ): - """ - Fit a generic FATE-network model. - - This is not intended for direct use. Instead, you should use one of - the domain-specific subclasses such as `FATEChoiceFuntion` or - `FATEObjectRanker` instead. - - Parameters - ---------- - X : numpy array or dict - Feature vectors of the objects - (n_instances, n_objects, n_features) if numpy array or map from n_objects to numpy arrays - Y : numpy array or dict - The exact semantics are domain dependent and should be - described in the relevant subclasses. - epochs : int - Number of epochs to run if training for a fixed query size or - number of epochs of the meta gradient descent for the variadic model - inner_epochs : int - Number of epochs to train for each query size inside the variadic - model - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - global_lr : float - Learning rate of the meta gradient descent (variadic model only) - global_momentum : float - Momentum for the meta gradient descent (variadic model only) - min_bucket_size : int - Restrict the training to queries of a minimum size - refit : bool - If True, create a new model object, otherwise continue fitting the - existing one if one exists. - **kwargs : - Keyword arguments for the fit function - """ - self._pre_fit() - if optimizer is not None: - self.optimizer = optimizer - if isinstance(X, dict): - if generator is not None: - logger.error("Variadic training does not support generators yet.") - raise NotImplementedError - self.is_variadic_ = True - decay_rate = global_lr / epochs - learning_rate = global_lr - freq = self._bucket_frequencies(X, min_bucket_size=min_bucket_size) - bucket_ids = np.array(tuple(X.keys())) - - # Create models which need to be trained - # Note, that the models share all their weights, the only - # difference is the compute graph constructed for back propagation. - if not hasattr(self, "models_") or refit: - self.models_ = self._construct_models(X) - - # Iterate training - for epoch in range(epochs): - - logger.info("Epoch: {}, Learning rate: {}".format(epoch, learning_rate)) - - # In the spirit of mini-batch SGD we also shuffle the buckets - # each epoch: - np.random.shuffle(bucket_ids) - self.curr_bucket_id = bucket_ids[0] - - w_before = np.array(self.get_weights()) - - for bucket_id in bucket_ids: - self.curr_bucket_id = bucket_id - # Skip query sizes with too few instances: - if X[bucket_id].shape[0] < min_bucket_size: - continue - - # self.set_weights(start) - x = X[bucket_id] - y = Y[bucket_id] - - # Save weight vector for momentum: - w_old = w_before - w_before = np.array(self.get_weights()) - self.models_[bucket_id].fit( - x=x, - y=y, - epochs=inner_epochs, - batch_size=self.batch_size, - validation_split=validation_split, - verbose=verbose, - **kwargs, - ) - w_after = np.array(self.get_weights()) - self.set_weights( - w_before - + learning_rate * freq[bucket_id] * (w_after - w_before) - + global_momentum * (w_before - w_old) - ) - learning_rate /= 1 + decay_rate * epoch - else: - self.is_variadic_ = False - - if not hasattr(self, "model_") or refit: - if generator is not None: - X, Y = next(iter(generator)) - - n_inst, n_objects, n_features = X.shape - - self.model_ = self.construct_model(n_features, n_objects) - logger.info("Fitting started") - if generator is None: - self.model_.fit( - x=X, - y=Y, - callbacks=callbacks, - epochs=epochs, - validation_split=validation_split, - batch_size=self.batch_size, - verbose=verbose, - **kwargs, - ) - else: - self.model_.fit_generator( - generator=generator, - callbacks=callbacks, - epochs=epochs, - verbose=verbose, - **kwargs, - ) - logger.info("Fitting complete") - - def construct_model(self, n_features, n_objects): - """ - Construct the FATE-network architecture using the :class:`DeepSet` to learn the context representation - :math:`\\mu_{C(x)}` for the given query set/context :math:`Q=C(x)`. We construct an input tensor of query - set :math:`Q` of size (n_objects, n_features),iterate over it for each object and concatenate the - context-representation feature tensor of size :math:`\\lvert \\mu_{C(x)} \\lvert` into a joint layers. - So, for each object we share the weights in the joint network and the output of this network is used to - learn the generalized latent utility score :math:`U (x, \\mu_{C(x)})` of each object :math:`x \\in Q`. - - Parameters - ---------- - n_features: int - Features of the objects for which the network is constructed - n_objects: int - Size of the query sets for which the network is constructed - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the FATE utility score - - """ - input_layer = Input(shape=(n_objects, n_features), name="input_node") - set_repr = self.set_layer_(input_layer) - scores = self.join_input_layers( - input_layer, - set_repr, - n_objects=n_objects, - n_layers=self.n_hidden_set_layers, - ) - model = Model(inputs=input_layer, outputs=scores) - - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - self._initialize_optimizer() - self._initialize_regularizer() - self._create_set_layers( - activation=self.activation, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer_, - ) - - def fit( - self, - X, - Y, - epochs=35, - inner_epochs=1, - callbacks=None, - validation_split=0.1, - verbose=0, - global_lr=1.0, - global_momentum=0.9, - min_bucket_size=500, - refit=False, - **kwargs, - ): - """ - Fit a generic preference learning FATE-network model on a provided set of queries. - - The provided queries can be of a fixed size (numpy arrays) or of - varying sizes in which case dictionaries are expected as input. - - For varying sizes a meta gradient descent is performed across the - different query sizes. - - Parameters - ---------- - X : numpy array or dict - Feature vectors of the objects - (n_instances, n_objects, n_features) if numpy array or map from n_objects to numpy arrays - Y : numpy array or dict - Preferences in form of rankings or choices for given objects - (n_instances, n_objects) if numpy array or map from n_objects to numpy arrays - epochs : int - Number of epochs to run if training for a fixed query size or - number of epochs of the meta gradient descent for the variadic model - inner_epochs : int - Number of epochs to train for each query size inside the variadic - model - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - global_lr : float - Learning rate of the meta gradient descent (variadic model only) - global_momentum : float - Momentum for the meta gradient descent (variadic model only) - min_bucket_size : int - Restrict the training to queries of a minimum size - refit : bool - If True, create a new model object, otherwise continue fitting the - existing one if one exists. - **kwargs : - Keyword arguments for the fit function - """ - _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - self._fit( - X=X, - Y=Y, - epochs=epochs, - inner_epochs=inner_epochs, - callbacks=callbacks, - validation_split=validation_split, - verbose=verbose, - global_lr=global_lr, - global_momentum=global_momentum, - min_bucket_size=min_bucket_size, - refit=refit, - **kwargs, - ) - return self - - def fit_generator( - self, - generator, - epochs=35, - steps_per_epoch=10, - inner_epochs=1, - callbacks=None, - verbose=0, - global_lr=1.0, - global_momentum=0.9, - min_bucket_size=500, - refit=False, - **kwargs, - ): - """ - Fit a generic object ranking FATE-network on a set of queries provided by - a generator. - - The provided queries can be of a fixed size (numpy arrays) or of - varying sizes in which case dictionaries are expected as input. - - For varying sizes a meta gradient descent is performed across the - different query sizes. - - Parameters - ---------- - generator : - A generator or an instance of `Sequence` (:class:`keras.utils.Sequence`) object in order to avoid - duplicate data when using multiprocessing. - The output of the generator must be either - - a tuple `(inputs, targets)` - - a tuple `(inputs, targets, sample_weights)`. - This tuple (a single output of the generator) makes a single batch. - Therefore, all arrays in this tuple must have the same length (equal to the size of this batch). - Different batches may have different sizes. - For example, the last batch of the epoch is commonly smaller than the others, if the size of the dataset - is not divisible by the batch size. The generator is expected to loop over its data indefinitely. An - epoch finishes when `steps_per_epoch` batches have been seen by the model. - epochs : int - Number of epochs to run if training for a fixed query size or - number of epochs of the meta gradient descent for the variadic model - steps_per_epoch : int - Number of batches to train per epoch - inner_epochs : int - Number of epochs to train for each query size inside the variadic - model - callbacks : list - List of callbacks to be called during optimization - verbose : bool - Print verbose information - global_lr : float - Learning rate of the meta gradient descent (variadic model only) - global_momentum : float - Momentum for the meta gradient descent (variadic model only) - min_bucket_size : int - Restrict the training to queries of a minimum size - refit : bool - If True, create a new model object, otherwise continue fitting the - existing one if one exists. - **kwargs: - Keyword arguments for the fit function - """ - self._fit( - generator=generator, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - inner_epochs=inner_epochs, - callbacks=callbacks, - verbose=verbose, - global_lr=global_lr, - global_momentum=global_momentum, - min_bucket_size=min_bucket_size, - refit=refit, - **kwargs, - ) - - def _get_context_representation(self, X, kwargs): - n_objects = X.shape[-2] - logger.info("Test Set instances {} objects {} features {}".format(*X.shape)) - input_layer_scorer = Input( - shape=(n_objects, self.n_object_features_fit_), name="input_node" - ) - if self.n_hidden_set_layers >= 1: - self.set_layer_(input_layer_scorer) - fr = self.set_layer_.cached_models[n_objects].predict(X, **kwargs) - del self.set_layer_.cached_models[n_objects] - X_n = np.empty( - (fr.shape[0], n_objects, fr.shape[1] + self.n_object_features_fit_), - dtype="float", - ) - for i in range(n_objects): - X_n[:, i] = np.concatenate((X[:, i], fr), axis=1) - X = np.copy(X_n) - return X - - def _predict_scores_fixed(self, X, **kwargs): - """ - Predict the scores for a fixed ranking size. - - Parameters - ---------- - X : numpy array - float (n_instances, n_objects, n_features) - - Returns - ------- - scores : numpy array - float (n_instances, n_objects) - - """ - # model = self._construct_scoring_model(n_objects) - X = self._get_context_representation(X, kwargs) - n_instances, n_objects, n_features = X.shape - logger.info( - "After applying the set representations features {}".format(n_features) - ) - input_layer_joint = Input( - shape=(n_objects, n_features), name="input_joint_model" - ) - scores = [] - - inputs = [create_input_lambda(i)(input_layer_joint) for i in range(n_objects)] - - for i in range(n_objects): - joint = inputs[i] - for j in range(self.n_hidden_joint_layers): - joint = self.joint_layers[j](joint) - scores.append(self.scorer(joint)) - scores = concatenate(scores, name="final_scores") - joint_model = Model(inputs=input_layer_joint, outputs=scores) - predicted_scores = joint_model.predict(X) - logger.info("Done predicting scores") - return predicted_scores diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py deleted file mode 100644 index 789709cd..00000000 --- a/csrank/core/feta_linear.py +++ /dev/null @@ -1,240 +0,0 @@ -from itertools import combinations -import logging -import math - -from keras.losses import binary_crossentropy -import numpy as np -from sklearn.utils import check_random_state -import tensorflow as tf - -from csrank.learner import Learner -from csrank.numpy_util import sigmoid -from csrank.util import progress_bar - -logger = logging.getLogger(__name__) - - -class FETALinearCore(Learner): - """Core Learner implementing the First Evaluate then Aggregate approach. - - This implements a linear variant of the FETA approach introduced in - [PfGuH18]. The idea is to first evaluate each object in each sub-context of - fixed size with a linear function approximator and then to aggregate these - evaluations. - - References - ---------- - - .. [PfGuH18] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2018). Deep - architectures for learning context-dependent ranking functions. arXiv - preprint arXiv:1803.05796. https://arxiv.org/pdf/1803.05796.pdf - """ - - def __init__( - self, - learning_rate=1e-3, - batch_size=256, - loss_function=binary_crossentropy, - epochs_drop=50, - drop=0.01, - random_state=None, - **kwargs, - ): - """ - Parameters - ---------- - learning_rate : float - The learning rate used by the gradient descent optimizer. - batch_size : int - The size of the mini-batches used to train the Neural Network. - loss_function - The loss function to minimize when training the Neural Network. See - the functions offered in the keras.losses module for more details. - epochs_drop: int - The amount of training epochs after which the learning rate is - decreased by a factor of `drop`. - drop: float - The factor by which to decrease the learning rate every - `epochs_drop` epochs. - random_state: np.RandomState - The random state to use in this object. - """ - self.learning_rate = learning_rate - self.batch_size = batch_size - self.random_state = random_state - self.loss_function = loss_function - self.epochs_drop = epochs_drop - self.drop = drop - - def _construct_model_(self, n_objects): - self.X = tf.placeholder( - "float32", [None, n_objects, self.n_object_features_fit_] - ) - self.Y = tf.placeholder("float32", [None, n_objects]) - std = 1 / np.sqrt(self.n_object_features_fit_) - self.b1 = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 - ) - self.W1 = tf.Variable( - self.random_state_.normal( - loc=0, scale=std, size=2 * self.n_object_features_fit_ - ), - dtype=tf.float32, - ) - self.W2 = tf.Variable( - self.random_state_.normal( - loc=0, scale=std, size=self.n_object_features_fit_ - ), - dtype=tf.float32, - ) - self.b2 = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32 - ) - self.W_out_ = tf.Variable( - self.random_state_.normal(loc=0, scale=std, size=2), - dtype=tf.float32, - name="W_out", - ) - - outputs = [list() for _ in range(n_objects)] - for i, j in combinations(range(n_objects), 2): - x1 = self.X[:, i] - x2 = self.X[:, j] - x1x2 = tf.concat((x1, x2), axis=1) - x2x1 = tf.concat((x2, x1), axis=1) - n_g = tf.tensordot(x1x2, self.W1, axes=1) + self.b1 - n_l = tf.tensordot(x2x1, self.W1, axes=1) + self.b1 - outputs[i].append(n_g[:, None]) - outputs[j].append(n_l[:, None]) - outputs = [tf.concat(x, axis=1) for x in outputs] - outputs = tf.reduce_mean(outputs, axis=-1) - outputs = tf.transpose(outputs) - zero_outputs = tf.tensordot(self.X, self.W2, axes=1) + self.b2 - scores = tf.sigmoid(self.W_out_[0] * zero_outputs + self.W_out_[1] * outputs) - scores = tf.cast(scores, tf.float32) - self.loss = self.loss_function(self.Y, scores) - self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize( - self.loss - ) - - def step_decay(self, epoch): - """Update the current learning rate. - - Computes the current learning rate based on the initial learning rate, - the current epoch and the decay speed set by the `epochs_drop` and - `drop` hyperparameters. - - Parameters - ---------- - - epoch: int - The current epoch. - """ - step = math.floor((1 + epoch) / self.epochs_drop) - self.current_lr_ = self.learning_rate * math.pow(self.drop, step) - self.optimizer = tf.train.GradientDescentOptimizer(self.current_lr_).minimize( - self.loss - ) - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - """ - Fit the preference learning algorithm on the provided set of queries X - and preferences Y of those objects. The provided queries and - corresponding preferences are of a fixed size (numpy arrays). - - Parameters - ---------- - X : array-like, shape (n_samples, n_objects, n_features) - Feature vectors of the objects - Y : array-like, shape (n_samples, n_objects) - Preferences of the objects in form of rankings or choices - epochs: int - The amount of epochs to train for. The training loop will try to - predict the target variables and adjust its parameters by gradient - descent `epochs` times. - """ - self._pre_fit() - # Global Variables Initializer - n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - if self.n_objects_fit_ < 2: - # Nothing to learn here, model cannot be constructed without any - # instance pairs. - return self - self._construct_model_(self.n_objects_fit_) - init = tf.global_variables_initializer() - - with tf.Session() as tf_session: - tf_session.run(init) - self._fit_(X, Y, epochs, n_instances, tf_session, verbose) - training_cost = tf_session.run(self.loss, feed_dict={self.X: X, self.Y: Y}) - logger.info( - "Fitting completed {} epochs done with loss {}".format( - epochs, training_cost.mean() - ) - ) - self.weight1_ = tf_session.run(self.W1) - self.bias1_ = tf_session.run(self.b1) - self.weight2_ = tf_session.run(self.W2) - self.bias2_ = tf_session.run(self.b2) - self.W_last_ = tf_session.run(self.W_out_) - return self - - def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): - try: - for epoch in range(epochs): - for start in range(0, n_instances, self.batch_size): - end = np.min([start + self.batch_size, n_instances]) - tf_session.run( - self.optimizer, - feed_dict={self.X: X[start:end], self.Y: Y[start:end]}, - ) - if verbose == 1: - progress_bar(end, n_instances, status="Fitting") - if verbose == 1: - c = tf_session.run(self.loss, feed_dict={self.X: X, self.Y: Y}) - print("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - if (epoch + 1) % 100 == 0: - c = tf_session.run(self.loss, feed_dict={self.X: X, self.Y: Y}) - logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - self.step_decay(epoch) - except KeyboardInterrupt: - logger.info("Interrupted") - c = tf_session.run(self.loss, feed_dict={self.X: X, self.Y: Y}) - logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) - - def _predict_scores_fixed(self, X, **kwargs): - """Predict the scores for a given collection of sets of objects of same size. - - Parameters - ---------- - X : array-like, shape (n_samples, n_objects, n_features) - - - Returns - ------- - Y : array-like, shape (n_samples, n_objects) - Returns the scores of each of the objects for each of the samples. - """ - n_instances, n_objects, n_features = X.shape - assert n_features == self.n_object_features_fit_ - outputs = [list() for _ in range(n_objects)] - for i, j in combinations(range(n_objects), 2): - x1 = X[:, i] - x2 = X[:, j] - x1x2 = np.concatenate((x1, x2), axis=1) - x2x1 = np.concatenate((x2, x1), axis=1) - n_g = np.dot(x1x2, self.weight1_) + self.bias1_ - n_l = np.dot(x2x1, self.weight1_) + self.bias1_ - outputs[i].append(n_g) - outputs[j].append(n_l) - outputs = np.array(outputs) - outputs = np.mean(outputs, axis=1).T - scores_zero = np.dot(X, self.weight2_) + self.bias2_ - scores = sigmoid(self.W_last_[0] * scores_zero + self.W_last_[1] * outputs) - return scores diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py deleted file mode 100644 index 7c441a97..00000000 --- a/csrank/core/feta_network.py +++ /dev/null @@ -1,349 +0,0 @@ -from itertools import combinations -from itertools import permutations -import logging - -from keras import backend as K -from keras import Input -from keras import Model -from keras.layers import add -from keras.layers import concatenate -from keras.layers import Dense -from keras.layers import Lambda -from keras.optimizers import SGD -from keras.regularizers import l2 -import numpy as np -from sklearn.utils import check_random_state - -from csrank.layers import NormalizedDense -from csrank.learner import Learner -from csrank.losses import hinged_rank_loss - -logger = logging.getLogger(__name__) - - -class FETANetwork(Learner): - def __init__( - self, - n_hidden=2, - n_units=8, - add_zeroth_order_model=False, - max_number_of_objects=5, - num_subsample=5, - loss_function=hinged_rank_loss, - batch_normalization=False, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="selu", - optimizer=SGD, - metrics=(), - batch_size=256, - random_state=None, - **kwargs, - ): - self.random_state = random_state - self.kernel_regularizer = kernel_regularizer - self.kernel_initializer = kernel_initializer - self.batch_normalization = batch_normalization - self.activation = activation - self.loss_function = loss_function - self.metrics = metrics - self.max_number_of_objects = max_number_of_objects - self.num_subsample = num_subsample - self.batch_size = batch_size - self.optimizer = optimizer - self.add_zeroth_order_model = add_zeroth_order_model - self.n_hidden = n_hidden - self.n_units = n_units - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - - @property - def n_objects(self): - if self.n_objects_fit_ > self.max_number_of_objects: - return self.max_number_of_objects - return self.n_objects_fit_ - - def _construct_layers(self): - self.input_layer = Input( - shape=(self.n_objects_fit_, self.n_object_features_fit_) - ) - # Todo: Variable sized input - # X = Input(shape=(None, n_features)) - logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - NormalizedDense( - self.n_units, - name="hidden_zeroth_{}".format(x), - **hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - Dense( - self.n_units, - name="hidden_zeroth_{}".format(x), - **hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - self.output_node = Dense( - 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer_ - ) - if self.add_zeroth_order_model: - self.output_node_zeroth = Dense( - 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer_ - ) - - @property - def zero_order_model(self): - if not hasattr(self, "zero_order_model_"): - if self.add_zeroth_order_model: - logger.info("Creating zeroth model") - inp = Input(shape=(self.n_object_features_fit_,)) - - x = inp - for hidden in self.hidden_layers_zeroth: - x = hidden(x) - zeroth_output = self.output_node_zeroth(x) - - self.zero_order_model_ = Model(inputs=[inp], outputs=zeroth_output) - logger.info("Done creating zeroth model") - else: - self.zero_order_model_ = None - return self.zero_order_model_ - - @property - def pairwise_model(self): - if not hasattr(self, "pairwise_model_"): - logger.info("Creating pairwise model") - x1 = Input(shape=(self.n_object_features_fit_,)) - x2 = Input(shape=(self.n_object_features_fit_,)) - - x1x2 = concatenate([x1, x2]) - x2x1 = concatenate([x2, x1]) - - for hidden in self.hidden_layers: - x1x2 = hidden(x1x2) - x2x1 = hidden(x2x1) - - merged_left = concatenate([x1x2, x2x1]) - merged_right = concatenate([x2x1, x1x2]) - - n_g = self.output_node(merged_left) - n_l = self.output_node(merged_right) - - merged_output = concatenate([n_g, n_l]) - self.pairwise_model_ = Model(inputs=[x1, x2], outputs=merged_output) - logger.info("Done creating pairwise model") - return self.pairwise_model_ - - def _predict_pair(self, a, b, only_pairwise=False, **kwargs): - # TODO: Is this working correctly? - pairwise = self.pairwise_model.predict([a, b], **kwargs) - if not only_pairwise and self.add_zeroth_order_model: - utility_a = self.zero_order_model.predict([a]) - utility_b = self.zero_order_model.predict([b]) - return pairwise + (utility_a, utility_b) - return pairwise - - def _predict_scores_using_pairs(self, X, **kwd): - n_instances, n_objects, n_features = X.shape - n2 = n_objects * (n_objects - 1) - pairs = np.empty((n2, 2, n_features)) - scores = np.zeros((n_instances, n_objects)) - for n in range(n_instances): - for k, (i, j) in enumerate(permutations(range(n_objects), 2)): - pairs[k] = (X[n, i], X[n, j]) - result = self._predict_pair( - pairs[:, 0], pairs[:, 1], only_pairwise=True, **kwd - )[:, 0] - scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) - del result - del pairs - if self.add_zeroth_order_model: - scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) - scores_zero = scores_zero.reshape(n_instances, n_objects) - scores = scores + scores_zero - return scores - - def construct_model(self): - """ - Construct the :math:`1`-st order and :math:`0`-th order models, which are used to approximate the - :math:`U_1(x, C(x))` and the :math:`U_0(x)` utilities respectively. For each pair of objects in - :math:`x_i, x_j \\in Q` :math:`U_1(x, C(x))` we construct :class:`CmpNetCore` with weight sharing to - approximate a pairwise-matrix. A pairwise matrix with index (i,j) corresponds to the :math:`U_1(x_i,x_j)` - is a measure of how favorable it is to choose :math:`x_i` over :math:`x_j`. Using this matrix we calculate - the borda score for each object to calculate :math:`U_1(x, C(x))`. For `0`-th order model we construct - :math:`\\lvert Q \\lvert` sequential networks whose weights are shared to evaluate the :math:`U_0(x)` for - each object in the query set :math:`Q`. The output mode is using linear activation. - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the FETA utility score - """ - - def create_input_lambda(i): - return Lambda(lambda x: x[:, i]) - - if self.add_zeroth_order_model: - logger.debug("Create 0th order model") - zeroth_order_outputs = [] - inputs = [] - for i in range(self.n_objects_fit_): - x = create_input_lambda(i)(self.input_layer) - inputs.append(x) - for hidden in self.hidden_layers_zeroth: - x = hidden(x) - zeroth_order_outputs.append(self.output_node_zeroth(x)) - zeroth_order_scores = concatenate(zeroth_order_outputs) - logger.debug("0th order model finished") - logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects_fit_)] - for i, j in combinations(range(self.n_objects_fit_), 2): - if self.add_zeroth_order_model: - x1 = inputs[i] - x2 = inputs[j] - else: - x1 = create_input_lambda(i)(self.input_layer) - x2 = create_input_lambda(j)(self.input_layer) - x1x2 = concatenate([x1, x2]) - x2x1 = concatenate([x2, x1]) - - for hidden in self.hidden_layers: - x1x2 = hidden(x1x2) - x2x1 = hidden(x2x1) - - merged_left = concatenate([x1x2, x2x1]) - merged_right = concatenate([x2x1, x1x2]) - - n_g = self.output_node(merged_left) - n_l = self.output_node(merged_right) - - outputs[i].append(n_g) - outputs[j].append(n_l) - # convert rows of pairwise matrix to keras layers: - outputs = [concatenate(x) for x in outputs] - - # compute utility scores: - scores = [ - Lambda(lambda s: K.mean(s, axis=1, keepdims=True))(x) for x in outputs - ] - scores = concatenate(scores) - logger.debug("1st order model finished") - if self.add_zeroth_order_model: - scores = add([scores, zeroth_order_scores]) - model = Model(inputs=self.input_layer, outputs=scores) - logger.debug("Compiling complete model...") - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _pre_fit(self): - super()._pre_fit() - self._initialize_optimizer() - self._initialize_regularizer() - self.random_state_ = check_random_state(self.random_state) - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - """ - Fit a generic preference learning model on a provided set of queries. - The provided queries can be of a fixed size (numpy arrays). - - Parameters - ---------- - X : numpy array - (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array - (n_instances, n_objects) - Preferences in form of rankings or choices for given objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - self._construct_layers() - - logger.debug("Enter fit function...") - - X, Y = self.sub_sampling(X, Y) - if self.n_objects_fit_ < 2: - # Nothing to learn, can't construct a model. - return self - self.model_ = self.construct_model() - logger.debug("Starting gradient descent...") - - self.model_.fit( - x=X, - y=Y, - batch_size=self.batch_size, - epochs=epochs, - callbacks=callbacks, - validation_split=validation_split, - verbose=verbose, - **kwd, - ) - return self - - def sub_sampling(self, X, Y): - if self.n_objects_fit_ > self.max_number_of_objects: - bucket_size = int(self.n_objects_fit_ / self.max_number_of_objects) - idx = self.random_state_.randint( - bucket_size, size=(len(X), self.n_objects_fit_) - ) - # TODO: subsampling multiple rankings - idx += np.arange(start=0, stop=self.n_objects_fit_, step=bucket_size)[ - : self.n_objects_fit_ - ] - X = X[np.arange(X.shape[0])[:, None], idx] - Y = Y[np.arange(X.shape[0])[:, None], idx] - tmp_sort = Y.argsort(axis=-1) - Y = np.empty_like(Y) - Y[np.arange(len(X))[:, None], tmp_sort] = np.arange(self.n_objects_fit_) - return X, Y - - def _predict_scores_fixed(self, X, **kwargs): - n_objects = X.shape[-2] - logger.info("For Test instances {} objects {} features {}".format(*X.shape)) - if self.n_objects_fit_ != n_objects: - scores = self._predict_scores_using_pairs(X, **kwargs) - else: - scores = self.model_.predict(X, **kwargs) - logger.info("Done predicting scores") - return scores diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py deleted file mode 100644 index 88344fff..00000000 --- a/csrank/core/ranknet_core.py +++ /dev/null @@ -1,205 +0,0 @@ -import logging - -from keras import Input -from keras import Model -from keras.layers import add -from keras.layers import Dense -from keras.layers import Lambda -from keras.optimizers import SGD -from keras.regularizers import l2 -from sklearn.utils import check_random_state - -from csrank.layers import NormalizedDense -from csrank.learner import Learner - -logger = logging.getLogger(__name__) - - -class RankNetCore(Learner): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - self.batch_normalization = batch_normalization - self.activation = activation - self.metrics = metrics - self.kernel_regularizer = kernel_regularizer - self.kernel_initializer = kernel_initializer - self.loss_function = loss_function - self.optimizer = optimizer - self.n_hidden = n_hidden - self.n_units = n_units - self.batch_size = batch_size - self.random_state = random_state - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - - def _construct_layers(self): - logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) - self.x1 = Input(shape=(self.n_object_features_fit_,)) - self.x2 = Input(shape=(self.n_object_features_fit_,)) - self.output_node = Dense( - 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer_ - ) - self.output_layer_score = Dense(1, activation="linear") - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - - def construct_model(self): - """ - Construct the RankNet which is used to approximate the :math:`U(x)` utility. For each pair of objects in - :math:`x_i, x_j \\in Q` we construct two sub-networks with weight sharing in all hidden layer apart form the - last layer for which weights are mirrored version of each other. The output of these networks are connected - to a sigmoid unit that produces the output :math:`P_{ij}` which is the probability of preferring object - :math:`x_i` over :math:`x_j`, to approximate the :math:`U(x)`. - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the RankNet utility score - """ - # weight sharing using same hidden layer for two objects - enc_x1 = self.hidden_layers[0](self.x1) - enc_x2 = self.hidden_layers[0](self.x2) - neg_x2 = Lambda(lambda x: -x)(enc_x2) - for hidden_layer in self.hidden_layers[1:]: - enc_x1 = hidden_layer(enc_x1) - neg_x2 = hidden_layer(neg_x2) - merged_inputs = add([enc_x1, neg_x2]) - output = self.output_node(merged_inputs) - model = Model(inputs=[self.x1, self.x2], outputs=output) - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _convert_instances_(self, X, Y): - raise NotImplementedError - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - self._initialize_optimizer() - self._initialize_regularizer() - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - """ - Fit a preference learning RankNet model on a provided set of queries. The provided queries can be of - a fixed size (numpy arrays). For learning this network the binary cross entropy loss function for a pair of - objects :math:`x_i, x_j \\in Q` is defined as: - - .. math:: - - C_{ij} = -\\tilde{P_{ij}}\\log(P_{ij}) - (1 - \\tilde{P_{ij}})\\log(1 - P{ij}) \\enspace, - - where :math:`\\tilde{P_{ij}}` is ground truth probability of the preference of :math:`x_i` over :math:`x_j`. - :math:`\\tilde{P_{ij}} = 1` if :math:`x_i \\succ x_j` else :math:`\\tilde{P_{ij}} = 0`. - - Parameters - ---------- - X : numpy array (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array (n_instances, n_objects) - Preferences in form of Orderings or Choices for given n_objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float (range : [0,1]) - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - **kwd : - Keyword arguments for the fit function - """ - self._pre_fit() - _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape - logger.debug("Creating the model") - - self._construct_layers() - - # Model with input as two objects and output as probability of x1>x2 - self.model_ = self.construct_model() - - if self.n_objects_fit_ < 2: - # Nothing to learn, cannot create pairwise comparisons. - return self - X1, X2, Y_single = self._convert_instances_(X, Y) - - logger.debug("Instances created {}".format(X1.shape[0])) - logger.debug("Finished Creating the model, now fitting started") - - self.model_.fit( - [X1, X2], - Y_single, - batch_size=self.batch_size, - epochs=epochs, - callbacks=callbacks, - validation_split=validation_split, - verbose=verbose, - **kwd, - ) - - logger.debug("Fitting Complete") - return self - - @property - def scoring_model(self): - """ - Creates a scoring model for the trained ListNet, which predicts the utility scores for given set of objects. - Returns - ------- - model: keras :class:`Model` - Neural network to learn the non-linear utility score - """ - if not hasattr(self, "scoring_model_"): - logger.info("creating scoring model") - inp = Input(shape=(self.n_object_features_fit_,)) - x = inp - for hidden_layer in self.hidden_layers: - x = hidden_layer(x) - output_score = self.output_node(x) - self.scoring_model_ = Model(inputs=[inp], outputs=output_score) - return self.scoring_model_ - - def _predict_scores_fixed(self, X, **kwargs): - n_instances, n_objects, n_features = X.shape - logger.info("Test Set instances {} objects {} features {}".format(*X.shape)) - X1 = X.reshape(n_instances * n_objects, n_features) - scores = self.scoring_model.predict(X1, **kwargs) - scores = scores.reshape(n_instances, n_objects) - logger.info("Done predicting scores") - return scores diff --git a/csrank/dataset_reader/letor_listwise_dataset_reader.py b/csrank/dataset_reader/letor_listwise_dataset_reader.py index 22d939db..35d55178 100644 --- a/csrank/dataset_reader/letor_listwise_dataset_reader.py +++ b/csrank/dataset_reader/letor_listwise_dataset_reader.py @@ -189,7 +189,10 @@ def create_dataset_dictionary(self, files): rel_deg = int(information[0]) qid = information[1].split(" ")[0] x = np.array( - [float(l.split(":")[1]) for l in information[1].split(" ")[1:-1]] + [ + float(elem.split(":")[1]) + for elem in information[1].split(" ")[1:-1] + ] ) x = np.insert(x, len(x), rel_deg) if qid not in dataset: diff --git a/csrank/dataset_reader/letor_ranking_dataset_reader.py b/csrank/dataset_reader/letor_ranking_dataset_reader.py index 02cff924..39eda27e 100644 --- a/csrank/dataset_reader/letor_ranking_dataset_reader.py +++ b/csrank/dataset_reader/letor_ranking_dataset_reader.py @@ -210,7 +210,10 @@ def create_dataset_dictionary(self, files): rel_deg = int(information[0]) qid = information[1].split(" ")[0] x = np.array( - [float(l.split(":")[1]) for l in information[1].split(" ")[1:-1]] + [ + float(elem.split(":")[1]) + for elem in information[1].split(" ")[1:-1] + ] ) x = np.insert(x, len(x), rel_deg) if qid not in dataset: diff --git a/csrank/discrete_choice_losses.py b/csrank/discrete_choice_losses.py new file mode 100644 index 00000000..19ea4f49 --- /dev/null +++ b/csrank/discrete_choice_losses.py @@ -0,0 +1,64 @@ +"""Loss functions for discrete choice problems.""" + +import torch + + +class CategoricalHingeLossMax: + """Compute the Categorical Hinge Loss. + + This is the "max" aggregated version of CHL, described on page 14/15 of + [1]_. + + Parameters + ---------- + scores: 2d tensor + The predicted scores for each object of each instance. + + true_choice: 2d tensor + The true choice mask for each instance. + + Returns + ------- + torch.float + The total loss, summed over all instances. + + References + ---------- + .. [1] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2019). Learning + choice functions: Concepts and architectures. arXiv preprint + arXiv:1901.10860. + """ + + # The argument order is chosen to be compatible with skorch. + def __call__(self, scores, true_choice): + """Compute the loss of a scoring in the context of a choice. + + >>> objects = ["a", "b", "c"] + >>> true_choice_1 = [0, 1, 0] + >>> scores_1 = [2, 1, 0.5] # non-chosen object "a" has higher score than chosen object "b" + >>> chl = CategoricalHingeLossMax() + >>> chl(torch.tensor([scores_1]), torch.tensor([true_choice_1])) + tensor(2.) + + >>> true_choice_2 = [1, 0, 0] + >>> scores_2 = [0, 1, 1.5] + + >>> chl = CategoricalHingeLossMax() + >>> chl(torch.tensor([scores_1, scores_2]), torch.tensor([true_choice_1, true_choice_2])) + tensor(4.5000) + """ + # not quite, but dealing with true infintiy is hairy and there should + # be no practical difference + infty = 2 ** 32 + + # Mask out the chosen scores from the max with a value of -infinity. + (max_score_not_chosen, _indices) = torch.max( + scores - true_choice * infty, dim=1 + ) + # Mask out the not-chosen scores from the min with a value of +infinity. + (min_score_chosen, _indices) = torch.min( + scores + (1 - true_choice) * infty, dim=1 + ) + + hinge = torch.clamp(1 + max_score_not_chosen - min_score_chosen, min=0) + return hinge.sum() diff --git a/csrank/discretechoice/__init__.py b/csrank/discretechoice/__init__.py index cb5ca873..fb6140ce 100644 --- a/csrank/discretechoice/__init__.py +++ b/csrank/discretechoice/__init__.py @@ -1,9 +1,5 @@ from .baseline import RandomBaselineDC -from .cmpnet_discrete_choice import CmpNetDiscreteChoiceFunction from .fate_discrete_choice import FATEDiscreteChoiceFunction -from .fatelinear_discrete_choice import FATELinearDiscreteChoiceFunction -from .feta_discrete_choice import FETADiscreteChoiceFunction -from .fetalinear_discrete_choice import FETALinearDiscreteChoiceFunction from .generalized_nested_logit import GeneralizedNestedLogitModel from .mixed_logit_model import MixedLogitModel from .model_selector import ModelSelector @@ -11,15 +7,9 @@ from .nested_logit_model import NestedLogitModel from .paired_combinatorial_logit import PairedCombinatorialLogit from .pairwise_discrete_choice import PairwiseSVMDiscreteChoiceFunction -from .ranknet_discrete_choice import RankNetDiscreteChoiceFunction __all__ = [ - "RandomBaselineDC", - "CmpNetDiscreteChoiceFunction", "FATEDiscreteChoiceFunction", - "FATELinearDiscreteChoiceFunction", - "FETADiscreteChoiceFunction", - "FETALinearDiscreteChoiceFunction", "GeneralizedNestedLogitModel", "MixedLogitModel", "ModelSelector", @@ -27,5 +17,5 @@ "NestedLogitModel", "PairedCombinatorialLogit", "PairwiseSVMDiscreteChoiceFunction", - "RankNetDiscreteChoiceFunction", + "RandomBaselineDC", ] diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py deleted file mode 100644 index 1b297051..00000000 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ /dev/null @@ -1,104 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 - -from csrank.choicefunction.util import generate_complete_pairwise_dataset -from csrank.core.cmpnet_core import CmpNetCore -from csrank.discretechoice.discrete_choice import DiscreteObjectChooser - -logger = logging.getLogger(__name__) - - -class CmpNetDiscreteChoiceFunction(DiscreteObjectChooser, CmpNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create an instance of the :class:`CmpNetCore` architecture for learning a discrete choice function. - CmpNet breaks the preferences in form of rankings into pairwise comparisons and learns a pairwise model for - the each pair of object in the underlying set. For prediction list of objects is converted in pair of - objects and the pairwise predicate is evaluated using them. The outputs of the network for each pair of - objects :math:`U(x_1,x_2), U(x_2,x_1)` are evaluated. - :math:`U(x_1,x_2)` is a measure of how favorable it is to choose :math:`x_1` over :math:`x_2`. - The utility score of object :math:`x_i` in query set :math:`Q = \\{ x_1 , \\ldots , x_n \\}` is evaluated as: - - .. math:: - - U(x_i) = \\left\\{ \\frac{1}{n-1} \\sum_{j \\in [n] \\setminus \\{i\\}} U_1(x_i , x_j)\\right\\} - - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{i \\in [n]} \\; U(x_i) - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudorandom generator or a RandomState instance - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - - References - ---------- - [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875 - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) - logger.info("Initializing network") - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - x1, x2, garbage, y_double, garbage = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_double diff --git a/csrank/discretechoice/discrete_choice.py b/csrank/discretechoice/discrete_choice.py index ae3b6f08..801aae33 100644 --- a/csrank/discretechoice/discrete_choice.py +++ b/csrank/discretechoice/discrete_choice.py @@ -2,8 +2,10 @@ from csrank.constants import DISCRETE_CHOICE from csrank.dataset_reader.discretechoice.util import convert_to_label_encoding +from csrank.discrete_choice_losses import CategoricalHingeLossMax +from csrank.learner import SkorchInstanceEstimator -__all__ = ["DiscreteObjectChooser"] +__all__ = ["DiscreteObjectChooser", "SkorchDiscreteChoiceFunction"] class DiscreteObjectChooser(metaclass=ABCMeta): @@ -44,3 +46,36 @@ def predict_for_scores(self, scores): result = scores.argmax(axis=1) result = convert_to_label_encoding(result, n) return result + + +class SkorchDiscreteChoiceFunction(DiscreteObjectChooser, SkorchInstanceEstimator): + """Base estimator for torch-based discrete choice. + + This makes it very simple to derive new estimators with any given scoring + module. Refer to skorch's documentation for supported parameters. For + example the optimizer or the optimizer's learning rate could be overridden. + + Parameters + ---------- + module : torch module (class) + This is the scoring module. It should be an uninstantiated + ``torch.nn.Module`` class that expects the number of features per + object as its only parameter on initialization. + + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. + + choice_size : int + The size of the target choice set. + + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``skorch.NeuralNet``. See the documentation of that class for more + details. + """ + + def __init__( + self, module, criterion=CategoricalHingeLossMax, choice_size=1, **kwargs + ): + super().__init__(module=module, criterion=criterion, **kwargs) + self.choice_size = choice_size diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index c4cd62e7..f380ad9f 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -1,136 +1,90 @@ -import logging +import functools -from keras.layers import Dense -from keras.optimizers import SGD -from keras.regularizers import l2 +import torch.nn as nn -from csrank.core.fate_network import FATENetwork -from csrank.discretechoice.discrete_choice import DiscreteObjectChooser +from csrank.discrete_choice_losses import CategoricalHingeLossMax +from csrank.discretechoice.discrete_choice import SkorchDiscreteChoiceFunction +from csrank.modules.object_mapping import DenseNeuralNetwork +from csrank.modules.scoring import FATEScoring -logger = logging.getLogger(__name__) +class FATEDiscreteChoiceFunction(SkorchDiscreteChoiceFunction): + """A discrete choice estimator based on the FATE-Approach. + + Trains a model that first evaluates each object in contexts of limited size + and then aggregates these evaluations to arrive at a final + object-within-context evaluation. + + The resulting model can then be used for context-sensitive choice. + + Parameters + ---------- + n_hidden_set_layers : int + The number of hidden layers that should be used for the ``DeepSet`` + context embedding. + + n_hidden_set_untis : int + The number of units per hidden layer that should be used for the + ``DeepSet`` context embedding. + + n_hidden_joint_layers : int + The number of hidden layers that should be used for the utility + function that evaluates each object in the aggregated context. + + n_hidden_joint_units : int + The number of units per hidden layer that should used for the utility + function that evaluates each object in the aggregated context. + + activation : torch activation function (class) + The activation function that should be used for each layer of the two + ("set" and "joint) neural networks. + + choice_size : int + The size of the target choice set. + + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. + + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``SkorchDiscreteChoice``. See the documentation of that class for more + details. + """ -class FATEDiscreteChoiceFunction(DiscreteObjectChooser, FATENetwork): def __init__( self, n_hidden_set_layers=2, n_hidden_set_units=32, - loss_function="categorical_hinge", - metrics=("categorical_accuracy",), n_hidden_joint_layers=2, n_hidden_joint_units=32, - activation="selu", - kernel_initializer="lecun_normal", - kernel_regularizer=l2, - optimizer=SGD, - batch_size=256, - random_state=None, - **kwargs, + activation=nn.SELU, + choice_size=1, + criterion=CategoricalHingeLossMax, + **kwargs ): - """ - Create a FATE-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_layers : int - Number of set layers. - n_hidden_set_units : int - Number of hidden set units. - n_hidden_joint_layers : int - Number of joint layers. - n_hidden_joint_units : int - Number of joint units. - activation : string or function - Activation function to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - metrics : list - List of evaluation metrics (can be non-differentiable) - random_state : int or object - Numpy random state - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers. See the keras - documentation for ``Dense`` for available options. - """ - self.loss_function = loss_function - self.metrics = metrics - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) + self.n_hidden_set_layers = n_hidden_set_layers + self.n_hidden_set_units = n_hidden_set_units + self.n_hidden_joint_layers = n_hidden_joint_layers + self.n_hidden_joint_units = n_hidden_joint_units + self.activation = activation super().__init__( - n_hidden_set_layers=n_hidden_set_layers, - n_hidden_set_units=n_hidden_set_units, - n_hidden_joint_layers=n_hidden_joint_layers, - n_hidden_joint_units=n_hidden_joint_units, - activation=activation, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - optimizer=optimizer, - batch_size=batch_size, - random_state=random_state, + module=FATEScoring, criterion=criterion, choice_size=choice_size, **kwargs ) - def _construct_layers(self): - """ - Construct basic layers shared by all the objects: - * Joint dense hidden layers - * Output scoring layer is sigmoid output for choice model - - Connecting the layers is done in join_input_layers and will be done in implementing classes. - """ - logger.info( - "Construct joint layers hidden units {} and layers {} ".format( - self.n_hidden_joint_units, self.n_hidden_joint_layers - ) + def _get_extra_module_parameters(self): + """Return extra parameters that should be passed to the module.""" + params = super()._get_extra_module_parameters() + params["pairwise_utility_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_joint_layers, + units_per_hidden=self.n_hidden_joint_units, + activation=self.activation(), + output_size=1, ) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - # Create joint hidden layers: - self.joint_layers = [] - for i in range(self.n_hidden_joint_layers): - self.joint_layers.append( - Dense( - self.n_hidden_joint_units, - name="joint_layer_{}".format(i), - **hidden_dense_kwargs, - ) - ) - - logger.info("Construct output score node") - self.scorer = Dense( - 1, - name="output_node", - activation="sigmoid", - kernel_regularizer=self.kernel_regularizer_, + params["embedding_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_set_layers, + units_per_hidden=self.n_hidden_set_units, + activation=self.activation(), ) + return params diff --git a/csrank/discretechoice/fatelinear_discrete_choice.py b/csrank/discretechoice/fatelinear_discrete_choice.py deleted file mode 100644 index e7ce0569..00000000 --- a/csrank/discretechoice/fatelinear_discrete_choice.py +++ /dev/null @@ -1,61 +0,0 @@ -import logging - -from keras.losses import categorical_hinge - -from csrank.core.fate_linear import FATELinearCore -from csrank.discretechoice.discrete_choice import DiscreteObjectChooser - -logger = logging.getLogger(__name__) - - -class FATELinearDiscreteChoiceFunction(DiscreteObjectChooser, FATELinearCore): - def __init__( - self, - n_hidden_set_units=32, - loss_function=categorical_hinge, - learning_rate=1e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - n_hidden_set_units=n_hidden_set_units, - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py deleted file mode 100644 index 764dfd08..00000000 --- a/csrank/discretechoice/feta_discrete_choice.py +++ /dev/null @@ -1,340 +0,0 @@ -from itertools import combinations -from itertools import permutations -import logging - -from keras import backend as K -from keras import Input -from keras import Model -from keras.layers import Activation -from keras.layers import concatenate -from keras.layers import Dense -from keras.layers import Lambda -from keras.optimizers import SGD -from keras.regularizers import l2 -import numpy as np - -from csrank.core.feta_network import FETANetwork -from csrank.layers import NormalizedDense -from csrank.numpy_util import sigmoid -from .discrete_choice import DiscreteObjectChooser - -logger = logging.getLogger(__name__) - - -class FETADiscreteChoiceFunction(DiscreteObjectChooser, FETANetwork): - def __init__( - self, - n_hidden=2, - n_units=8, - add_zeroth_order_model=False, - max_number_of_objects=10, - num_subsample=5, - loss_function="categorical_hinge", - batch_normalization=False, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="selu", - optimizer=SGD, - metrics=("categorical_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FETA-network architecture for learning the discrete choice functions. - The first-evaluate-then-aggregate approach approximates the context-dependent utility function using the - first-order utility function :math:`U_1 \\colon \\mathcal{X} \\times \\mathcal{X} \\rightarrow [0,1]` - and zeroth-order utility function :math:`U_0 \\colon \\mathcal{X} \\rightarrow [0,1]`. - The scores each object :math:`x` using a context-dependent utility function :math:`U (x, C_i)`: - - .. math:: - U(x_i, C_i) = U_0(x_i) + \\frac{1}{n-1} \\sum_{x_j \\in Q \\setminus \\{x_i\\}} U_1(x_i , x_j) \\, . - - Training and prediction complexity is quadratic in the number of objects. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x_i \\in Q} \\; U (x_i, C_i) - - Parameters - ---------- - n_hidden : int - Number of hidden layers - n_units : int - Number of hidden units in each layer - add_zeroth_order_model : bool - True if the model should include a latent utility function - max_number_of_objects : int - The maximum number of objects to train from - num_subsample : int - Number of objects to subsample to - loss_function : function - Differentiable loss function for the score vector - batch_normalization : bool - Whether to use batch normalization in the hidden layers - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : string or function - Activation function to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of evaluation metrics (can be non-differentiable) - batch_size : int - Batch size to use for training - random_state : int or object - Numpy random state - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - """ - self._store_kwargs( - kwargs, {"optimizer__", "kernel_regularizer__", "hidden_dense_layer__"} - ) - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - add_zeroth_order_model=add_zeroth_order_model, - max_number_of_objects=max_number_of_objects, - num_subsample=num_subsample, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - ) - - def _construct_layers(self): - self.input_layer = Input( - shape=(self.n_objects_fit_, self.n_object_features_fit_) - ) - # Todo: Variable sized input - # X = Input(shape=(None, n_features)) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - NormalizedDense( - self.n_units, - name="hidden_zeroth_{}".format(x), - **hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - if self.add_zeroth_order_model: - self.hidden_layers_zeroth = [ - Dense( - self.n_units, - name="hidden_zeroth_{}".format(x), - **hidden_dense_kwargs, - ) - for x in range(self.n_hidden) - ] - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - self.output_node = Dense( - 1, - activation="linear", - kernel_regularizer=self.kernel_regularizer_, - name="score", - ) - if self.add_zeroth_order_model: - self.output_node_zeroth = Dense( - 1, - activation="linear", - kernel_regularizer=self.kernel_regularizer_, - name="zero_score", - ) - self.weighted_sum = Dense( - 1, - activation="sigmoid", - kernel_regularizer=self.kernel_regularizer_, - name="weighted_sum", - ) - - def construct_model(self): - """ - Construct the :math:`1`-st order and :math:`0`-th order models, which are used to approximate the - :math:`U_1(x, C(x))` and the :math:`U_0(x)` utilities respectively. For each pair of objects in - :math:`x_i, x_j \\in Q` :math:`U_1(x, C(x))` we construct :class:`CmpNetCore` with weight sharing to - approximate a pairwise-matrix. A pairwise matrix with index (i,j) corresponds to the :math:`U_1(x_i,x_j)` - is a measure of how favorable it is to choose :math:`x_i` over :math:`x_j`. Using this matrix we calculate - the borda score for each object to calculate :math:`U_1(x, C(x))`. For `0`-th order model we construct - :math:`\\lvert Q \\lvert` sequential networks whose weights are shared to evaluate the :math:`U_0(x)` for - each object in the query set :math:`Q`. The output mode is using sigmoid activation. - - Returns - ------- - model: keras :class:`Model` - Neural network to learn the FETA utility score - """ - - def create_input_lambda(i): - return Lambda(lambda x: x[:, i]) - - if self.add_zeroth_order_model: - logger.debug("Create 0th order model") - zeroth_order_outputs = [] - inputs = [] - for i in range(self.n_objects_fit_): - x = create_input_lambda(i)(self.input_layer) - inputs.append(x) - for hidden in self.hidden_layers_zeroth: - x = hidden(x) - zeroth_order_outputs.append(self.output_node_zeroth(x)) - zeroth_order_scores = concatenate(zeroth_order_outputs) - logger.debug("0th order model finished") - logger.debug("Create 1st order model") - outputs = [list() for _ in range(self.n_objects_fit_)] - for i, j in combinations(range(self.n_objects_fit_), 2): - if self.add_zeroth_order_model: - x1 = inputs[i] - x2 = inputs[j] - else: - x1 = create_input_lambda(i)(self.input_layer) - x2 = create_input_lambda(j)(self.input_layer) - x1x2 = concatenate([x1, x2]) - x2x1 = concatenate([x2, x1]) - - for hidden in self.hidden_layers: - x1x2 = hidden(x1x2) - x2x1 = hidden(x2x1) - - merged_left = concatenate([x1x2, x2x1]) - merged_right = concatenate([x2x1, x1x2]) - - N_g = self.output_node(merged_left) - N_l = self.output_node(merged_right) - - outputs[i].append(N_g) - outputs[j].append(N_l) - # convert rows of pairwise matrix to keras layers: - outputs = [concatenate(x) for x in outputs] - - # compute utility scores: - scores = [ - Lambda(lambda s: K.mean(s, axis=1, keepdims=True))(x) for x in outputs - ] - scores = concatenate(scores) - logger.debug("1st order model finished") - if self.add_zeroth_order_model: - - def get_score_object(i): - return Lambda(lambda x: x[:, i, None]) - - concat_scores = [ - concatenate( - [ - get_score_object(i)(scores), - get_score_object(i)(zeroth_order_scores), - ] - ) - for i in range(self.n_objects_fit_) - ] - scores = [] - for i in range(self.n_objects_fit_): - scores.append(self.weighted_sum(concat_scores[i])) - scores = concatenate(scores) - - # if self.add_zeroth_order_model: - # scores = add([scores, zeroth_order_scores]) - # if self.add_zeroth_order_model: - # def expand_dims(): - # return Lambda(lambda x: x[..., None]) - # - # def squeeze_dims(): - # return Lambda(lambda x: x[:, :, 0]) - # - # scores = expand_dims()(scores) - # zeroth_order_scores = expand_dims()(zeroth_order_scores) - # concat_scores = concatenate([scores, zeroth_order_scores], axis=-1) - # weighted_sum = Conv1D(name='weighted_sum', filters=1, kernel_size=(1), strides=1, activation='linear', - # kernel_initializer=self.kernel_initializer, input_shape=(self.n_objects_fit_, 2), - # kernel_regularizer=self.kernel_regularizer, use_bias=False) - # scores = weighted_sum(concat_scores) - # scores = squeeze_dims()(scores) - if not self.add_zeroth_order_model: - scores = Activation("sigmoid")(scores) - model = Model(inputs=self.input_layer, outputs=scores) - logger.debug("Compiling complete model...") - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - def _create_weighted_model(self, n_objects): - def get_score_object(i): - return Lambda(lambda x: x[:, i, None]) - - s1 = Input(shape=(n_objects,)) - s2 = Input(shape=(n_objects,)) - concat_scores = [ - concatenate([get_score_object(i)(s1), get_score_object(i)(s2)]) - for i in range(n_objects) - ] - scores = [] - for i in range(n_objects): - scores.append(self.weighted_sum(concat_scores[i])) - scores = concatenate(scores) - model = Model(inputs=[s1, s2], outputs=scores) - return model - - def _predict_scores_using_pairs(self, X, **kwd): - n_instances, n_objects, n_features = X.shape - n2 = n_objects * (n_objects - 1) - pairs = np.empty((n2, 2, n_features)) - scores = np.zeros((n_instances, n_objects)) - for n in range(n_instances): - for k, (i, j) in enumerate(permutations(range(n_objects), 2)): - pairs[k] = (X[n, i], X[n, j]) - result = self._predict_pair( - pairs[:, 0], pairs[:, 1], only_pairwise=True, **kwd - )[:, 0] - scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) - del result - del pairs - if self.add_zeroth_order_model: - scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) - scores_zero = scores_zero.reshape(n_instances, n_objects) - model = self._create_weighted_model(n_objects) - scores = model.predict([scores, scores_zero], **kwd) - else: - scores = sigmoid(scores) - return scores - - def _create_zeroth_order_model(self): - inp = Input(shape=(self.n_object_features_fit_,)) - - x = inp - for hidden in self.hidden_layers_zeroth: - x = hidden(x) - zeroth_output = self.output_node_zeroth(x) - - return Model(inputs=[inp], outputs=Activation("sigmoid")(zeroth_output)) diff --git a/csrank/discretechoice/fetalinear_discrete_choice.py b/csrank/discretechoice/fetalinear_discrete_choice.py deleted file mode 100644 index 769dcbf2..00000000 --- a/csrank/discretechoice/fetalinear_discrete_choice.py +++ /dev/null @@ -1,59 +0,0 @@ -import logging - -from keras.losses import categorical_hinge - -from csrank.core.feta_linear import FETALinearCore -from csrank.discretechoice.discrete_choice import DiscreteObjectChooser - -logger = logging.getLogger(__name__) - - -class FETALinearDiscreteChoiceFunction(DiscreteObjectChooser, FETALinearCore): - def __init__( - self, - loss_function=categorical_hinge, - learning_rate=5e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py deleted file mode 100644 index 96ae4b1b..00000000 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ /dev/null @@ -1,98 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 - -from csrank.choicefunction.util import generate_complete_pairwise_dataset -from csrank.core.ranknet_core import RankNetCore -from .discrete_choice import DiscreteObjectChooser - -logger = logging.getLogger(__name__) - - -class RankNetDiscreteChoiceFunction(DiscreteObjectChooser, RankNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create an instance of the :class:`RankNetCore` architecture for learning a choice function. - It breaks the preferences into pairwise comparisons and learns a latent utility model for the objects. - This network learns a latent utility score for each object in the given query set - :math:`Q = \\{x_1, \\ldots ,x_n\\}` using the equation :math:`U(x) = F(x, w)` where :math:`w` is the weight - vector. It is estimated using *pairwise preferences* generated from the discrete choices. - - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - ρ(Q) = \\operatorname{argsort}_{x \\in Q} \\; U(x) - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudo-random generator or a RandomState instance - **kwargs - Keyword arguments for the algorithms - - References - ---------- - [1] Burges, C. et al. (2005, August). "Learning to rank using gradient descent.", In Proceedings of the 22nd international conference on Machine learning (pp. 89-96). ACM. - - [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581). - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) - logger.info("Initializing network") - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_single diff --git a/csrank/dyadranking/fate_dyad_ranker.py b/csrank/dyadranking/fate_dyad_ranker.py deleted file mode 100644 index 59d00244..00000000 --- a/csrank/dyadranking/fate_dyad_ranker.py +++ /dev/null @@ -1,16 +0,0 @@ -from csrank.core.fate_network import FATENetwork -from csrank.dyadranking.dyad_ranker import DyadRanker -from csrank.numpy_util import scores_to_rankings - - -class FATEDyadRanker(FATENetwork, DyadRanker): - def fit(self, Xo, Xc, Y, **kwargs): - self._pre_fit() - return self - - def predict_scores(self, Xo, Xc, **kwargs): - return self.model_.predict([Xo, Xc], **kwargs) - - def predict(self, Xo, Xc, **kwargs): - s = self.predict_scores(Xo, Xc, **kwargs) - return scores_to_rankings(s) diff --git a/csrank/layers.py b/csrank/layers.py deleted file mode 100644 index a5e971c9..00000000 --- a/csrank/layers.py +++ /dev/null @@ -1,160 +0,0 @@ -import logging - -from keras.layers import Activation -from keras.layers import BatchNormalization -from keras.layers import Dense -from keras.layers import Input -from keras.layers import Lambda -from keras.layers.merge import average -from keras.models import Model - -__all__ = ["NormalizedDense", "DeepSet", "create_input_lambda"] -logger = logging.getLogger(__name__) - - -class NormalizedDense(object): - """Stop training when a monitored quantity has stopped improving. - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "relu:). - normalize_before_activation: True if normalize the inputs before applying the activation. - False if activation is applied before Bach Normalization - """ - - def __init__( - self, units, activation="relu", normalize_before_activation=False, **kwd - ): - self.dense = Dense(units, activation="linear", **kwd) - self.activation = Activation(activation=activation) - self.batchnorm = BatchNormalization() - self.norm_layer = None - self.normalize_before_activation = normalize_before_activation - - def __call__(self, x): - if self.normalize_before_activation: - return self.activation(self.batchnorm(self.dense(x))) - else: - return self.batchnorm(self.activation(self.dense(x))) - - def get_weights(self): - w_b = self.batchnorm.get_weights() - w_d = self.dense.get_weights() - return w_b, w_d - - def set_weights(self, weights): - w_b, w_d = weights - self.batchnorm.set_weights(w_b) - self.dense.set_weights(w_d) - - -class DeepSet(object): - """Deep layer for learning representations for sets of objects. - - Parameters - ---------- - units : int - Number of units in each representation layer - - layers : int - Number of layers to use for learning the representation - - activation : string, optional (default='selu') - Activation function to use in each unit - - kernel_initializer : string, optional (default='lecun_normal') - Initializer for the weight matrix - - input_shape : array_like - Should provide (n_objects, n_features) (DEPRECATED) - - Attributes - ---------- - model : Keras model - Representing the complete deep set layer - - set_mapping_layers : list - List of densely connected hidden layers - """ - - def __init__( - self, - units, - layers=1, - activation="selu", - kernel_initializer="lecun_normal", - kernel_regularizer=None, - input_shape=None, - **kwargs, - ): - self.n_units = units - if input_shape is not None: - logger.warning( - "input_shape is deprecated, since the number " - "of objects is now inferred" - ) - self.n_features = input_shape[1] - self.n_layers = layers - self.activation = activation - self.kernel_initializer = kernel_initializer - self.kernel_regularizer = kernel_regularizer - - self.cached_models = dict() - self._construct_layers( - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - activation=activation, - **kwargs, - ) - - def _construct_layers(self, **kwargs): - # Create set representation layers: - self.set_mapping_layers = [] - for i in range(self.n_layers): - self.set_mapping_layers.append( - Dense(self.n_units, name="set_layer_{}".format(i), **kwargs) - ) - - def _create_model(self, shape): - n_objects, n_features = shape[1].value, shape[2].value - if hasattr(self, "n_features"): - if self.n_features != n_features: - logger.error("Number of features is not consistent.") - input_layer = Input(shape=(n_objects, n_features)) - inputs = [create_input_lambda(i)(input_layer) for i in range(n_objects)] - - # Connect input tensors with set mapping layer: - set_mappings = [] - for i in range(n_objects): - curr = inputs[i] - for j in range(len(self.set_mapping_layers)): - curr = self.set_mapping_layers[j](curr) - set_mappings.append((i, curr)) - - # TODO: is feature_repr used outside? - x_values = [x for (j, x) in set_mappings] - feature_repr = average(x_values) if len(x_values) > 1 else x_values[0] - - self.cached_models[n_objects] = Model(inputs=input_layer, outputs=feature_repr) - - def __call__(self, x): - shape = x.shape - n_objects = shape[1].value - if n_objects not in self.cached_models: - self._create_model(shape) - return self.cached_models[n_objects](x) - - def get_weights(self): - w_set = [x.get_weights() for x in self.set_mapping_layers] - return w_set - - def set_weights(self, weights): - for i, layer in enumerate(self.set_mapping_layers): - layer.set_weights(weights[i]) - - -def create_input_lambda(i): - """Extracts off an object tensor from an input tensor""" - return Lambda(lambda x: x[:, i]) diff --git a/csrank/learner.py b/csrank/learner.py index 29caa737..752ca750 100644 --- a/csrank/learner.py +++ b/csrank/learner.py @@ -1,10 +1,9 @@ from abc import ABCMeta from abc import abstractmethod -import inspect import logging -from keras.layers import Dense from sklearn.base import BaseEstimator +from skorch import NeuralNet logger = logging.getLogger(__name__) @@ -24,7 +23,6 @@ def _store_kwargs(self, kwargs, allowed_prefixes): Raises an exception if one of the kwargs does not match a whiltelisted prefix. """ - self.allowed_prefixes_ = allowed_prefixes def starts_with_legal_prefix(key): for prefix in allowed_prefixes: @@ -48,101 +46,6 @@ def _get_prefix_attributes(self, prefix): """ return filter_dict_by_prefix(self.__dict__, prefix) - def _initialize_optimizer(self): - optimizer_params = self._get_prefix_attributes("optimizer__") - self.optimizer_ = self.optimizer(**optimizer_params) - - def _initialize_regularizer(self): - regularizer_params = self._get_prefix_attributes("kernel_regularizer__") - if self.kernel_regularizer is not None: - self.kernel_regularizer_ = self.kernel_regularizer(**regularizer_params) - else: - # No regularizer is an option. - logger.warning("You specified regularizer parameters but no regularizer.") - self.kernel_regularizer_ = None - - def set_params(self, **params): - """Set a hyper-paramter for this learner. - - Accepts the same parameters as __init__. - """ - legal_parameters = self.get_params().keys() - for param in params.keys(): - if param not in legal_parameters: - raise TypeError( - f"Unexpected parameter for {type(self).__name__}: `{param}.` Legal parameters are {set(legal_parameters)}." - ) - vars(self).update(params) - - def _prefix_to_class_mapping(self): - """Map nested parameter prefixes to the classes they are passed to. - - Necessary for get_params. - """ - result = dict() - allowed_prefixes = ( - self.allowed_prefixes_ if hasattr(self, "allowed_prefixes_") else [] - ) - for prefix in allowed_prefixes: - base_parameter = prefix[:-2] # prefixes always end with two underscores - if hasattr(self, base_parameter): - result[prefix] = vars(self)[base_parameter] - # This is a hack to work with our common "hidden_dense_layer__" - # arguments. They do not correspond to a single hidden_dense_layer - # attribute. They are passed to all hidden dense layers that are - # part of the network. Therefore we just hardcode the "Dense" class - # for them. - elif base_parameter == "hidden_dense_layer": - result[prefix] = Dense - else: - raise ValueError( - f"Prefix {prefix} could not be associated to any class." - ) - return result - - def get_params(self, deep=True): - """Return all hyperparmeters of this learner. - - Limitation: This does not recurse into parameters, so it only works for a - single layer. - - Parameters - ---------- - deep: bool, default=True - Whether or not to return parameters of subobjects as well. Support - for this is currently limited, so parameters of subobjects are - returned on a best-effort basis if they were passed with the - subobject__parameter convention. - - Returns - ------- - dict - A dictionary of parameters. - """ - # Get all the regular parameters form BaseEstimator. - result = super().get_params() - - if not deep: - return result - - # Handle the parameter that could be passed to uninitialized subclasses - # (optimizer__lr etc.). - parameters_for_prefix = dict() - for (prefix, base_class) in self._prefix_to_class_mapping().items(): - parameters_for_prefix = dict() - signature = inspect.signature(base_class) - for parameter in signature.parameters: - if signature.parameters[parameter].default != inspect._empty: - parameters_for_prefix[parameter] = signature.parameters[ - parameter - ].default - # Override with explicitly set parameter values - parameters_for_prefix.update(self._get_prefix_attributes(prefix)) - for (arg, default) in parameters_for_prefix.items(): - result[prefix + arg] = default - - return result - @abstractmethod def fit(self, X, Y, **kwargs): """ @@ -289,3 +192,120 @@ def __subclasshook__(cls, C): ): return True return NotImplemented + + +class SkorchInstanceEstimator(NeuralNet, Learner): + """Base estimator for torch-based ranking and choice tasks. + + This establishes the basic interface of a cs-ranking learner that is + compatible with scikit-learn. It is based on an skorch estimator with the + added assumption that the ``module`` expects the number of features per + object as a parameter. The ``module`` should then predict a score for each + object which can later be converted to a prediction (i.e. a ranking, a + general choice or a discrete choice). To derive a new estimator you should + therefore override the constructor to set default values for the ``module`` + and the ``criterion`` parameter. You should also override the + ``predict_for_scores`` function to specify how the scores can be converted + to the target prediction. You may use one of the existing mixins such as + ``ObjectRanker`` for that purpose. + + See the documentation of ``skorch.NeuralNet`` for a description of the + possible parameters. + """ + + def _get_extra_module_parameters(self): + """Return extra parameters that should be passed to the module. + + You should take care to update the dictionary from the ``super`` + implementation when overriding this function. You usually do not want + to just discard the parameters that are specified by the super class. + """ + return {"n_features": self.n_features_} + + def get_params_for(self, prefix): + """Return the init parameters for an attribute. + + This extends the ``get_params_for`` function from skorch to inject + custom module parameters. This allows us to pass parameters that do not + directly correspond to parameters of this estimator while also sticking + to the scikit-learn estimator API. Overriding this function is + preferable to than overriding ``initialize_module`` since this function + does not modify the object's state and we can simply extend the results + of a ``super`` delegation. + """ + params = super().get_params_for(prefix) + + if prefix == "module": + # Explicitly set parameters override the default values. + defaults = self._get_extra_module_parameters() + defaults.update(params) + return defaults + else: + return params + return params + + def fit(self, X, y=None, **fit_params): + """Fit the estimator to data. + + This derives the number of object features from the data and then + delegates to ``skorch.NeuralNet.fit``. See the documentation of that + method for more details. + + Parameters + ---------- + X : input data + May take various forms, such as numpy arrays or torch datasets. See + the documentation of ``skorch.NeuralNet.fit`` for more details. + + y : target data + May take the same forms as ``x``. This is optional since the target + data may already be included in the data structure that is passed + as ``X``. See the documentation of ``skorch.NeuralNet.fit`` for + more details. + + **fit_params : dict + Additional fit parameters. See the documentation of + ``skorch.NeuralNet.fit`` for more details. + """ + dataset = self.get_dataset(X, y) + (_n_objects, self.n_features_) = dataset[0][0].shape + NeuralNet.fit(self, X=dataset, y=None, **fit_params) + + def predict(self, X, **kwargs): + """Predict targets for inputs. + + This delegates to ``csrank.Learner.predict``. See the documentation of + that function for details. + + Parameters + ---------- + X : dict or numpy array + Dictionary with a mapping from the query set size to numpy arrays or a single numpy array of size: + (n_instances, n_objects, n_features) + + Returns + ------- + Y : dict or numpy array + Dictionary with a mapping from the query set size to numpy arrays or a single numpy array containing + predicted preferences of size: + (n_instances, n_objects) + """ + return Learner.predict(self, X, **kwargs) + + def _predict_scores_fixed(self, X, **kwargs): + """Predict scores for a collection of sets of objects of the same size. + + This simply queries the torch module for a prediction on the input + data, which can then be interpreted as scores. + + Parameters + ---------- + X : array-like, shape (n_samples, n_objects, n_features) + The input data. + + Returns + ------- + Y : array-like, shape (n_samples, n_objects) + The predicted scores. + """ + return self.predict_proba(X, **kwargs) diff --git a/csrank/losses.py b/csrank/losses.py deleted file mode 100644 index 476db363..00000000 --- a/csrank/losses.py +++ /dev/null @@ -1,74 +0,0 @@ -from keras import backend as K -import tensorflow as tf - -from csrank.tensorflow_util import tensorify - -__all__ = [ - "hinged_rank_loss", - "make_smooth_ndcg_loss", - "smooth_rank_loss", - "plackett_luce_loss", -] - - -def identifiable(loss_function): - def wrap_loss(y_true, y_pred): - alpha = 1e-4 - ss = tf.reduce_sum(tf.square(y_pred), axis=1) - return alpha * ss + loss_function(y_true, y_pred) - - return wrap_loss - - -@identifiable -def hinged_rank_loss(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - mask = K.cast(K.greater(y_true[:, None] - y_true[:, :, None], 0), dtype="float32") - diff = y_pred[:, :, None] - y_pred[:, None] - hinge = K.maximum(mask * (1 - diff), 0) - n = K.sum(mask, axis=(1, 2)) - return K.sum(hinge, axis=(1, 2)) / n - - -@identifiable -def smooth_rank_loss(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - mask = K.cast(K.greater(y_true[:, None] - y_true[:, :, None], 0), dtype="float32") - exped = K.exp(y_pred[:, None] - y_pred[:, :, None]) - result = K.sum(exped * mask, axis=[1, 2]) - return result / K.sum(mask, axis=(1, 2)) - - -@identifiable -def plackett_luce_loss(y_true, s_pred): - y_true = tf.cast(y_true, dtype="int32") - s_pred = tf.cast(s_pred, dtype="float32") - m = tf.shape(y_true)[1] - raw_max = tf.reduce_max(s_pred, axis=1, keepdims=True) - max_elem = tf.stop_gradient( - tf.where(tf.is_finite(raw_max), raw_max, tf.zeros_like(raw_max)) - ) - exped = tf.exp(tf.subtract(s_pred, max_elem)) - masks = tf.greater_equal(y_true, tf.range(m)[:, None, None]) - tri = exped * tf.cast(masks, tf.float32) - lse = tf.reduce_sum(tf.log(tf.reduce_sum(tri, axis=2)), axis=0) - return lse - tf.reduce_sum(s_pred, axis=1) - - -def make_smooth_ndcg_loss(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - n_objects = K.max(y_true) + 1.0 - y_true_f = K.cast(y_true, "float32") - relevance = n_objects - y_true_f - 1.0 - log_term = K.log(relevance + 2.0) / K.log(2.0) - exp_relevance = K.pow(2.0, relevance) - 1.0 - gains = exp_relevance / log_term - - # Calculate ideal dcg: - idcg = K.sum(gains, axis=-1) - - # Calculate smoothed dcg: - exped = K.exp(y_pred) - exped = exped / K.sum(exped, axis=-1, keepdims=True) - # toppred, toppred_ind = tf.nn.top_k(gains * exped, k) - return 1 - K.sum(exped * gains, axis=-1) / idcg diff --git a/csrank/metrics.py b/csrank/metrics.py deleted file mode 100644 index 28cca809..00000000 --- a/csrank/metrics.py +++ /dev/null @@ -1,431 +0,0 @@ -"""Various metrics that can be used to evaluate rankings. - -All metrics take two parameters: `y_true` and `y_pred`. Both of these -are (n_instances, n_objects) shaped arrays of integers. We call these -arrays rankings. The element (i, j) of a ranking specifies the rank of -the jth object in the ith instance. `y_true` should be set to the -"ground truth" to evaluate against, while `y_pred` is the prediction -that should be evaluated. - -Examples --------- -Lets assume we have two instances: ABCD and abcd. The "ground truth" -rankings are A > D > C > B and d < c < a < b. - -We applied some ranking algorithm, which gave rankings of A > C > D > B -and d < a < b < c respectively. - -Let's use some of the metrics defined here to evaluate the performance -of our ranker: - -First encode the ground truth as a list of rankings. 0 is the highest -rank: ->>> y_true = [ -... [0, 3, 2, 1], # A > D > C > B, 0 is the highest rank -... [2, 3, 1, 0], # d < c < a < b -... ] - -Now similarly encode our prediction: ->>> y_pred = [ -... [0, 3, 1, 2], # A > C > D > B -... [1, 2, 3, 0], # d < a < b < c -... ] - -Evaluate with a simple zero-one loss: ->>> from keras import backend as K ->>> K.eval(zero_one_rank_loss(y_true, y_pred)) -0.25 - -This is what we would expect: 25% of the objects were ranked at exactly the -right place. This might not be the most realistic metric, so let's try the -expected reciprocal rank instead: - ->>> K.eval(err(y_true, y_pred)) -0.6365559895833333 -""" -from functools import partial - -from keras import backend as K -import numpy as np -import tensorflow as tf - -from csrank.tensorflow_util import get_instances_objects -from csrank.tensorflow_util import scores_to_rankings -from csrank.tensorflow_util import tensorify - -__all__ = [ - "zero_one_rank_loss", - "zero_one_rank_loss_for_scores", - "zero_one_rank_loss_for_scores_ties", - "make_ndcg_at_k_loss", - "kendalls_tau_for_scores", - "spearman_correlation_for_scores", - "zero_one_accuracy", - "zero_one_accuracy_for_scores", - "topk_categorical_accuracy", -] - - -def zero_one_rank_loss(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - mask = K.greater(y_true[:, None] - y_true[:, :, None], 0) - # Count the number of mistakes (here position difference less than 0) - mask2 = K.less(y_pred[:, None] - y_pred[:, :, None], 0) - mask3 = K.equal(y_pred[:, None] - y_pred[:, :, None], 0) - - # Calculate Transpositions - transpositions = tf.logical_and(mask, mask2) - transpositions = K.sum(K.cast(transpositions, dtype="float32"), axis=[1, 2]) - - n_objects = K.max(y_true) + 1 - transpositions += ( - K.sum(K.cast(mask3, dtype="float32"), axis=[1, 2]) - n_objects - ) / 4.0 - denominator = K.cast((n_objects * (n_objects - 1.0)) / 2.0, dtype="float32") - result = transpositions / denominator - return K.mean(result) - - -def zero_one_accuracy(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - n_instances, n_objects = get_instances_objects(y_true) - equal_ranks = K.cast(K.all(K.equal(y_pred, y_true), axis=1), dtype="float32") - denominator = K.cast(n_instances, dtype="float32") - zero_one_loss = K.sum(equal_ranks) / denominator - return zero_one_loss - - -def zero_one_rank_loss_for_scores(y_true, s_pred): - return zero_one_rank_loss_for_scores_ties(y_true, s_pred) - - -def zero_one_rank_loss_for_scores_ties(y_true, s_pred): - y_true, s_pred = tensorify(y_true), tensorify(s_pred) - n_objects = K.cast(K.max(y_true) + 1, dtype="float32") - mask = K.greater(y_true[:, None] - y_true[:, :, None], 0) - mask2 = K.greater(s_pred[:, None] - s_pred[:, :, None], 0) - mask3 = K.equal(s_pred[:, None] - s_pred[:, :, None], 0) - - # Calculate Transpositions - transpositions = tf.logical_and(mask, mask2) - transpositions = K.sum(K.cast(transpositions, dtype="float32"), axis=[1, 2]) - transpositions += ( - K.sum(K.cast(mask3, dtype="float32"), axis=[1, 2]) - n_objects - ) / 4.0 - - denominator = n_objects * (n_objects - 1.0) / 2.0 - result = transpositions / denominator - return K.mean(result) - - -def make_ndcg_at_k_loss(k=5): - r"""Computes the Normalized Discounted Cumulative Gain - - The Discounted Cumulative Gain is the sum of the document's relevancies, - logarithmically discounted by their rank. That means the DCG is higher when - the more relevant documents are highly ranked, lower otherwise. - - Concretely: - - .. math:: - \mathrm{DCG}_p = \sum_{i = 1}^p \frac{\mathit{rel}_i}{\log_2(i + 1)} - - Where :math:`\mathit{rel}_i` is the relevance of the document that is - ranked at :math:`i`. Since this library deals with ranks, not relevances, - it is necessary to define a conversion between the two. We define the - relevance of an item as :math:`2^{\mathit{inv}}` where :math:`\mathit{inv}` - is the negative rank normalized to :math:`[0, 1]`. An alternative way to - view this is that :math:`\mathit{inv}` is the relevancy and our definition - of ndcg exponentially discounts relevancies. - - To make the DCG comparable across different rankings (particularly rankings - of different length), it is normalized by the ideal DCG. The resulting nDCG - can be described as - - .. math:: - \mathrm{nDCG}_p = \frac{\mathrm{DCG}_p}{\mathrm{IDCG}_p} - - with - - .. math:: - \mathrm{IDCG}_p - = \sum_{i = 1}^{\lvert \mathit{REL}_p \rvert} \frac{\mathit{rel}}{\log_2(i + 1)} - - where :math:`\mathit{REL}_p` is the list of relevant documents and - :math:`\mathit{rel_i}` are the document relevancies in decreasing order. - - It follows that the nDCG is always a value in :math:`(0, 1]`, with - :math:`1` being the best value. - - Parameters - ---------- - k: int - The length of the ranking for evaluation purposes. If the actual - ranking is longer than `k`, only the (true) top `k` entries are - considered. This is often more useful than considering the full - ranking, for example when only a subset of the elements will actually - be presented to a user. - """ - - def ndcg(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - - max_rank = K.max(y_true) - - def rank_to_relevance(rank): - # Convert a rank to a relevance, which is (somewhat arbitrarily) - # defined to be inversely proportional to the rank and normalized - # to [0, 1]. Other conversion functions are possible. - normalized_inverse = (max_rank - rank) / max_rank - # define the relevance as 2**a - return K.pow(2.0, normalized_inverse) - 1.0 - - relevance_true = rank_to_relevance(y_true) - relevance_pred = rank_to_relevance(y_pred) - - # Calculate ideal dcg: - most_relevant_items, most_relevant_idx = tf.math.top_k(relevance_true, k) - # arange starts at 0, but ranks start at 1 and the log term starts at 2 - log_term = K.log(K.arange(k, dtype="float32") + 2.0) - # keras only natively supports the natural logarithm, have to switch base - log2_term = log_term / K.log(2.0) - idcg = K.sum(most_relevant_items / log2_term, axis=-1, keepdims=True) - - # Calculate actual dcg: - - # The index of the row of every element in toppred_ind, i.e. - # [[0, 0], - # [1, 1]] - row_ind = K.cumsum(K.ones_like(most_relevant_idx, dtype="int32"), axis=0) - 1 - - # Indices of the k truly most relevant items, sorted by relevance. We - # want to sort the predictions based on those indices, since that is - # what we're trying to match. - full_indices = K.stack([row_ind, most_relevant_idx], axis=-1) - - # Predicted relevances for the items that *should* have the top k - # slots, ordered by the relevance rank they *should* have (so the - # log2_term from the true predictions still has the right oder) - top_k_preds = tf.gather_nd(relevance_pred, full_indices) - - weighted = top_k_preds / log2_term - dcg = K.sum(weighted, axis=-1, keepdims=True) - - gain = dcg / idcg - return gain - - return ndcg - - -def kendalls_tau_for_scores(y_true, y_pred): - return 1.0 - 2.0 * zero_one_rank_loss_for_scores(y_true, y_pred) - - -def spearman_correlation_for_scores(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - n_instances, n_objects = get_instances_objects(y_true) - predicted_rankings = scores_to_rankings(n_objects, y_pred) - y_true = K.cast(y_true, dtype="float32") - sum_of_squared_distances = tf.constant(0.0) - for i in np.arange(K.int_shape(y_pred)[1]): - objects_pred = predicted_rankings[:, i] - objects_true = y_true[:, i] - t = (objects_pred - objects_true) ** 2 - sum_of_squared_distances = sum_of_squared_distances + tf.reduce_sum(t) - denominator = K.cast( - n_objects * (n_objects ** 2 - 1) * n_instances, dtype="float32" - ) - spearman_correlation = 1 - (6 * sum_of_squared_distances) / denominator - return spearman_correlation - - -def zero_one_accuracy_for_scores(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - n_instances, n_objects = get_instances_objects(y_true) - predicted_rankings = scores_to_rankings(n_objects, y_pred) - y_true = K.cast(y_true, dtype="float32") - equal_ranks = K.cast( - K.all(K.equal(predicted_rankings, y_true), axis=1), dtype="float32" - ) - denominator = K.cast(n_instances, dtype="float32") - zero_one_loss = K.sum(equal_ranks) / denominator - return zero_one_loss - - -def topk_categorical_accuracy(k=5): - def topk_acc(y_true, y_pred): - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - acc = tf.nn.in_top_k(y_pred, tf.argmax(y_true, axis=-1), k=k) - acc = K.cast(acc, dtype="float32") - return acc - - return topk_acc - - -def relevance_gain(grading, max_grade): - """Maps a ranking (0 to `max_grade`, lower is better) to its gain. - - The gain is defined similar to the Discounted Cumulative Gain (DCG) - metric. The value is always in [0, 1]. Therefore, it can be - interpreted as a probability. - - Parameters - ---------- - max_grade: float - The highest achievable grade. - grading: float - A grading. Higher gradings are assumed to be better. - 0 <= grading <= max_grade must always hold. - - Tests - ----- - >>> y_true = [0, 3, 2, 1] # (A > D > C > B) - >>> K.eval(relevance_gain(y_true, max_grade=3)).tolist() - [0.875, 0.0, 0.125, 0.375] - """ - grading = tensorify(grading) - inverse_grading = -grading + tf.cast(max_grade, grading.dtype) - return (2 ** inverse_grading - 1) / (2 ** tf.cast(max_grade, tf.float32)) - - -def err(y_true, y_pred, utility_function=None, probability_mapping=None): - """Computes the Expected Reciprocal Rank or any Cascade Metric. - - ERR[1] is the cascade metric with the reciprocal rank as the utility - function. - - Parameters - ---------- - y_true: list of int - The "ground truth" ranking. In this case, this does not need to - actually be a ranking. It can be any 2 dimensional array whose - elements can be transformed to probabilities by the - `probability_mapping`. - y_pred: list of int - The predicted ranking that is to be evaluated. - probability_mapping: list of int -> list of float - A function that maps the elements of `y_true` to probabilities. - Those values are then interpreted as the probability that the - corresponding object satisfies the user's need. If `None` is - specified, the `relevance_gain` function with `max_grade` set to - the highest grade occurring in the grading is used. - utility_function: int -> float - A function that maps a rank (0 being the highest) to its - "utility". If `None` is specified, this is defined as the - reciprocal of the rank (resulting in the ERR metric). If a - different utility is specified, this function can compute any - cascade metric. Corresponds to the function represented by - :math:`\\phi` in [1]. This will usually be a monotonically - decreasing function, since the user is more likely to examine - the first few results and therefore more likely to derive - utility from them. - - Examples - -------- - - First, let's keep the default values and evaluate a ranking: - >>> y_true = [ - ... [0, 3, 2, 1], - ... [2, 3, 1, 0], - ... ] - >>> y_pred = [ - ... [0, 3, 1, 2], - ... [1, 2, 3, 0], - ... ] - >>> K.eval(err(y_true, y_pred)) - 0.6365559895833333 - - Instead of relying on the relevance gain, we can also explicitly specify - our own probabilities: - >>> y_true = [ - ... [0.3, 0.6, 0.05, 0.05], - ... [0.1, 0.1, 0.1, 0.7], - ... ] - >>> y_pred = [ - ... [0, 3, 1, 2], - ... [1, 2, 3, 0], - ... ] - - Now `y_true[i, j]` is the probability that object `j` in instance `i` - satisfies the user's need. To use this probabilities unchanged, we need to - override the probability mapping with the identity: - - >>> probability_mapping = lambda x: x - - Let us further specify that the rank utilities decrease in an exponential - manner, e.g. every rank is only half as "valuable" as its predecessor: - >>> utility_function = lambda r: 1/2**(r - 1) # start with 2**0 = 1 - - We can now evaluate the metric: - >>> K.eval(err( - ... y_true, - ... y_pred, - ... probability_mapping=probability_mapping, - ... utility_function=utility_function, - ... )) - 0.3543499991945922 - - The resulting metric is technically no longer an expected reciprocal rank, - since the utility is not given by the reciprocal of the rank. It is a - different version of a cascade metric. The original paper [1] called it an - abandonment cascade (with gamma = 1/2), so let us define a new name for it: - - >>> from functools import partial - >>> abandonment_cascade_half = partial( - ... err, - ... probability_mapping=probability_mapping, - ... utility_function=utility_function, - ... ) - >>> K.eval(abandonment_cascade_half(y_true, y_pred)) - 0.3543499991945922 - - References - ---------- - [1] Chapelle, Olivier, et al. "Expected reciprocal rank for graded - relevance." Proceedings of the 18th ACM conference on Information and - knowledge management. ACM, 2009. http://olivier.chapelle.cc/pub/err.pdf - """ - if probability_mapping is None: - max_grade = tf.reduce_max(y_true) - probability_mapping = partial(relevance_gain, max_grade=max_grade) - if utility_function is None: - - def reciprocal_rank(rank): - return 1 / rank - - utility_function = reciprocal_rank - y_true, y_pred = tensorify(y_true), tensorify(y_pred) - - ninstances = tf.shape(y_pred)[0] - nobjects = tf.shape(y_pred)[1] - - # Using y_true and the probability mapping, we can derive the - # probability that each object satisfies the users need (we need to - # map over the flattened array and then restore the shape): - satisfied_probs = tf.reshape( - tf.map_fn(probability_mapping, tf.reshape(y_true, (-1,))), tf.shape(y_true) - ) - - # sort satisfied probabilities according to the predicted ranking - rows = tf.range(0, ninstances) - rows_cast = tf.broadcast_to(tf.reshape(rows, (-1, 1)), tf.shape(y_pred)) - full_indices = tf.stack([rows_cast, tf.cast(y_pred, tf.int32)], axis=2) - satisfied_at_rank = tf.gather_nd(satisfied_probs, full_indices) - - not_satisfied_n_times = tf.cumprod(1 - satisfied_at_rank, axis=1, exclusive=True) - - # And from the positions predicted in y_pred we can further derive - # the utilities of each object given their position: - utilities = tf.map_fn( - utility_function, tf.range(1, nobjects + 1), dtype=tf.float64, - ) - - discount_at_rank = tf.cast(not_satisfied_n_times, tf.float64) * tf.reshape( - utilities, (1, -1) - ) - discounted_document_values = ( - tf.cast(satisfied_at_rank, tf.float64) * discount_at_rank - ) - results = tf.reduce_sum(discounted_document_values, axis=1) - - return K.mean(results) diff --git a/csrank/modules/instance_reduction.py b/csrank/modules/instance_reduction.py new file mode 100644 index 00000000..72d405f3 --- /dev/null +++ b/csrank/modules/instance_reduction.py @@ -0,0 +1,63 @@ +"""Modules that reduce instances to some kind of feature representation. + +The modules listed here should take a 2 or higher dimensional input and reduce +the second-to-last dimension. They can take interaction of "elements" (feature +vectors) into account. + +Inputs of shape :math:`(N, *, O, H_i)` are transformed to outputs of shape +:math:`(N, *, H_o)`. In this case :math:`N` is the batch size, :math:`*` +denotes arbitrary additional dimension (which are preserved), :math:`O` refers +to the number of objects per instance (the reduced dimension) and :math:`H_i` +and :math:`H_o` refer to the number of input and output features respectively. +""" + +import torch +import torch.nn as nn + + +class DeepSet(nn.Module): + """Aggregate object-level embeddings with a mean reduction. + + This module evaluates each object individually (using a object level + embedding) and then aggregates the embeddings with a mean reduction. + + Parameters + ---------- + n_features : int + The number of features per object. + + embedding_size : int + The target embedding size. + + embedding_module : torch module + An uninitialized torch module that expects two parameters: the input + and the output size. It should then act similar to ``nn.Linear``, i.e. + transform only the last dimension of the input. Defaults to a simple + linear module. + """ + + def __init__( + self, + n_features: int, + embedding_size: int, + embedding_module: nn.Module = nn.Linear, + ): + super().__init__() + self.embedding_module = embedding_module(n_features, embedding_size) + + def forward(self, instances): + """Forward inputs through the network. + + Parameters + ---------- + instances : tensor + The input tensor of shape (N, *, O, F), where F is the number of + features and O is the number of objects. + + Returns + ------- + tensor + A tensor of shape (N, *, E), where E ist the embedding size. + """ + embedded_objects = self.embedding_module(instances) + return torch.mean(embedded_objects, dim=1) diff --git a/csrank/modules/object_mapping.py b/csrank/modules/object_mapping.py new file mode 100644 index 00000000..89b9c355 --- /dev/null +++ b/csrank/modules/object_mapping.py @@ -0,0 +1,158 @@ +"""Modules that transform feature vectors individually. + +The modules listed here should take a 1 or higher dimensional input and apply +some mapping to the last dimension. They do not take interactions of different +feature vectors into account. + +Inputs of shape :math:`(N, *, H_i)` are transformed to outputs of shape +:math:`(N, *, H_o)`. In this case :math:`N` is the batch size, :math:`*` +denotes arbitrary additional dimension (which are preserved), :math:`H_i` +refers to the number of input features :math:`H_o` refers to the number of +input and output features respectively. +""" + +import torch.nn as nn + +# Refer to Figure 1 of https://arxiv.org/pdf/1901.10860.pdf for an overview of +# the different components that are used for FATE and FETA. + + +class DeterministicSumming(nn.Module): + """Transform a tensor into repetitions of its sum. + + Intended for use in tests, not useful for actual learning. The last + dimension of the input should contain feature vectors. The result will be + an array of matching shape with the last dimension replaced by repeated + utility values (i.e. sums). + + Let's use this as a pairwise utility function. As an example, consider + this pairing. There are two instances with two objects each. All object + combinations are considered. Objects have two features. + + >>> import torch + >>> pairs = torch.tensor( + ... [[[0.5000, 0.6000, 0.5000, 0.6000], + ... [0.5000, 0.6000, 1.5000, 1.6000], + ... [1.5000, 1.6000, 0.5000, 0.6000], + ... [1.5000, 1.6000, 1.5000, 1.6000]], + ... [[2.5000, 2.6000, 2.5000, 2.6000], + ... [2.5000, 2.6000, 3.5000, 3.6000], + ... [3.5000, 3.6000, 2.5000, 2.6000], + ... [3.5000, 3.6000, 3.5000, 3.6000]]]) + + We can compute the mock utility of this pairing as follows: + + >>> utility = DeterministicSumming(input_size=2) + >>> utilities = utility(pairs) + >>> utilities + tensor([[[ 2.2000], + [ 4.2000], + [ 4.2000], + [ 6.2000]], + + [[10.2000], + [12.2000], + [12.2000], + [14.2000]]]) + + Note that for example :math:`2.2 = 0.5 + 0.6 + 0.5 + 0.6`, that is + + >>> utilities[0][0] == pairs[0][0].sum() + tensor([True]) + + Parameters + ---------- + input_size : int + The size of the last dimension of the input. + + output_size : int + The size of the last dimension of the output. Defaults to `1` to make + it more convenient to use this as a utility. + """ + + def __init__(self, input_size: int, output_size: int = 1): + super().__init__() + self.output_size = output_size + + def forward(self, inputs): + """Forward inputs through the network. + + Parameters + ---------- + inputs : tensor + The input tensor of shape (N, *, I), where I is the input size. + + Returns + ------- + tensor + A tensor of shape (N, *, O), where O is the output size. + """ + summed = inputs.sum(dim=-1) + # repeat in newly created last dimension + repeated = ( + summed.view(-1, 1) + .repeat(1, self.output_size) + .view(summed.shape + (self.output_size,)) + ) + return repeated + + +class DenseNeuralNetwork(nn.Module): + """Deep, densely connected neural network. + + All hidden layers have the same number of units. + + Parameters + ---------- + input_size: int + The number of units at the input layer. + output_size: int + The number of units at the output layer. + hidden_layers: int + The number of hidden layers in addition to the input and output layer. + units_per_hidden: int + The number of units each hidden layer has. + activation: torch activation function + The activation function that should be applied after each layer. + Defaults to an instance of `nn.SELU`. + """ + + def __init__( + self, + input_size: int, + output_size: int, + hidden_layers: int, + units_per_hidden: int, + activation=None, + ): + super().__init__() + self.input_layer = nn.Linear(input_size, units_per_hidden) + # ModuleList is necessary to make pytorch aware of these layers and add + # their parameters to this module's parameter list. + self.hidden_layers = nn.ModuleList( + [ + nn.Linear(units_per_hidden, units_per_hidden) + for _ in range(hidden_layers) + ] + ) + self.output_layer = nn.Linear(units_per_hidden, output_size) + self.activation = activation if activation is not None else nn.SELU() + + def forward(self, x): + """Forward inputs through the network. + + Parameters + ---------- + inputs : tensor + The input tensor of shape (N, *, I), where I is the input size. + + Returns + ------- + tensor + A tensor of shape (N, *, O), where O is the output size. + """ + result = self.activation(self.input_layer(x)) + for layer in self.hidden_layers: + result = self.activation(layer(result)) + result = self.output_layer(result) + return result diff --git a/csrank/modules/scoring/__init__.py b/csrank/modules/scoring/__init__.py new file mode 100644 index 00000000..0d0651fa --- /dev/null +++ b/csrank/modules/scoring/__init__.py @@ -0,0 +1,14 @@ +"""High level implementation of scoring modules. + +These are pytorch modules that take a list of instances and score each object +within each instance, taking its context into account. Only the high-level +assembly is done in this module. + +One can easily derive ranking and choice estimators from a scorer, see +the implemented pytorch estimators such as ``FATEDiscreteObjectChooser`` for +examples. +""" + +from .fate import FATEScoring + +__all__ = ["FATEScoring"] diff --git a/csrank/modules/scoring/fate.py b/csrank/modules/scoring/fate.py new file mode 100644 index 00000000..7018e0bb --- /dev/null +++ b/csrank/modules/scoring/fate.py @@ -0,0 +1,129 @@ +"""An implementation of the scoring module for FATE estimators.""" + +import functools + +import torch +import torch.nn as nn + +from csrank.modules.instance_reduction import DeepSet +from csrank.modules.object_mapping import DenseNeuralNetwork + + +class FATEScoring(nn.Module): + r"""Map instances to scores with the FATE approach. + + Let's show the FATE approach on an example. To simplify things, we'll use a + simply identity-embedding. The FATE module will then aggregate the context + by simply taking the average of the objects (feature-wise). To further + simplify things the actual pairwise utility is just computed by the sum of + all features of the object and the context. + + >>> import torch.nn as nn + >>> from csrank.modules.object_mapping import DeterministicSumming + >>> scoring = FATEScoring( + ... n_features=2, + ... pairwise_utility_module=DeterministicSumming, + ... embedding_module=nn.Identity, + ... ) + + Now let's define some problem instances. + + >>> object_a = [0.5, 0.8] + >>> object_b = [1.5, 1.8] + >>> object_c = [2.5, 2.8] + >>> object_d = [3.5, 3.6] + >>> object_e = [4.5, 4.6] + >>> object_f = [5.5, 5.6] + >>> # instance = list of objects to rank + >>> instance_a = [object_a, object_b, object_c] + >>> instance_b = [object_d, object_e, object_f] + >>> import torch + >>> instances = torch.tensor([instance_a, instance_b]) + + Let's focus on the first instance in this example. The aggregated identity + embedding is + + >>> embedding_1 = (object_a[0] + object_b[0] + object_c[0]) / 3 + >>> embedding_2 = (object_a[1] + object_b[1] + object_c[1]) / 3 + >>> (embedding_1, embedding_2) + (1.5, 1.8) + + for the first and second feature respectively. So the utility of object_a + within the context (defined by the mock sum utility) should be + + >>> embedding_1 + embedding_2 + object_a[0] + object_a[1] + 4.6 + + Let's verify this: + + >>> scoring(instances) + tensor([[ 4.6000, 6.6000, 8.6000], + [16.2000, 18.2000, 20.2000]]) + + As you can see, the scoring comes to the same result for the first object + of the first instance. + + Parameters + ---------- + n_features: int + The number of features each object has. + embedding_size: int + The size of the embeddings that should be generated. Defaults to + ``n_features`` if not specified. + pairwise_utility_module: pytorch module with one integer parameter + The module that should be used for pairwise utility estimations. Uses a + simple linear mapping not specified. You likely want to replace this + with something more expressive such as a ``DenseNeuralNetwork``. This + should take the size of the input values as its only parameter. You can + use ``functools.partial`` if necessary. This corresponds to + :math:`U` in Figure 2 of [1]_. + embedding_module: pytorch module with one integer parameter + The module that should be used for the object embeddings. Its + constructor should take two parameters: The size of the input and the + size of the output. This corresponds to :math:`\Phi` in Figure 2 of + [1]_. The default is a ``DenseNeuralNetwork`` with 5 hidden layers and + 64 units per hidden layer. + + References + ---------- + .. [1] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2019). Learning + choice functions: Concepts and architectures. arXiv preprint + arXiv:1901.10860. + """ + + def __init__( + self, + n_features, + embedding_size=None, + pairwise_utility_module=None, + embedding_module=None, + ): + super().__init__() + if embedding_size is None: + embedding_size = n_features + if pairwise_utility_module is None: + pairwise_utility_module = functools.partial(nn.Linear, out_features=1,) + if embedding_module is None: + embedding_module = functools.partial( + DenseNeuralNetwork, hidden_layers=5, units_per_hidden=64 + ) + + self.embedding = DeepSet( + n_features, embedding_size, embedding_module=embedding_module, + ) + self.pairwise_utility_module = pairwise_utility_module( + n_features + embedding_size + ) + + def forward(self, instances, **kwargs): + n_objects = instances.size(1) + contexts = self.embedding(instances) + # Repeat each context for each object within the instance; This is then + # a flat list of contexts. Then reshape to have a list of contexts per + # instance. + context_per_object = contexts.repeat_interleave(n_objects, dim=0).reshape_as( + instances + ) + pairs = torch.stack((instances, context_per_object), dim=-1) + utilities = self.pairwise_utility_module(pairs.flatten(start_dim=-2)).squeeze() + return utilities diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py index ce09aaaf..a0ca5fb0 100644 --- a/csrank/objectranking/__init__.py +++ b/csrank/objectranking/__init__.py @@ -1,23 +1,11 @@ from .baseline import RandomBaselineRanker -from .cmp_net import CmpNet from .expected_rank_regression import ExpectedRankRegression from .fate_object_ranker import FATEObjectRanker -from .fatelinear_object_ranker import FATELinearObjectRanker -from .feta_object_ranker import FETAObjectRanker -from .fetalinear_object_ranker import FETALinearObjectRanker -from .list_net import ListNet -from .rank_net import RankNet from .rank_svm import RankSVM __all__ = [ - "CmpNet", "ExpectedRankRegression", "FATEObjectRanker", - "FATELinearObjectRanker", - "FETAObjectRanker", - "FETALinearObjectRanker", - "ListNet", - "RankNet", - "RankSVM", "RandomBaselineRanker", + "RankSVM", ] diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py deleted file mode 100644 index 2afdec5a..00000000 --- a/csrank/objectranking/cmp_net.py +++ /dev/null @@ -1,110 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 - -from csrank.core.cmpnet_core import CmpNetCore -from csrank.dataset_reader.objectranking.util import generate_complete_pairwise_dataset -from csrank.objectranking.object_ranker import ObjectRanker - -__all__ = ["CmpNet"] -logger = logging.getLogger(__name__) - - -class CmpNet(ObjectRanker, CmpNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create an instance of the :class:`CmpNetCore` architecture for learning a object ranking function. - CmpNet breaks the preferences in form of rankings into pairwise comparisons and learns a pairwise model for - the each pair of object in the underlying set. For prediction list of objects is converted in pair of - objects and the pairwise predicate is evaluated using them. The outputs of the network, i.e., - :math:`U(x_1,x_2), U(x_2,x_1)`for each pair of objects are evaluated. - :math:`U(x_1,x_2)` is a measure of how favorable it is to choose :math:`x_1` over :math:`x_2`. - The utility score of object :math:`x_i` in query set :math:`Q = \\{ x_1 , \\ldots , x_n \\}` is evaluated as: - - .. math:: - - U(x_i) = \\left\\{ \\frac{1}{n-1} \\sum_{j \\in [n] \\setminus \\{i\\}} U_1(x_i , x_j)\\right\\} - - - The ranking for the given query set :math:`Q` is defined as: - - .. math:: - - ρ(Q) = \\operatorname{argsort}_{x \\in Q} \\; U(x) - - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the - pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - descent. Must be a function without arguments that returns a - Keras optimizer. - metrics : list - List of metrics to evaluate during training (can be - non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudorandom generator or a RandomState instance - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - - - References - ---------- - [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875 - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) - logger.info("Initializing network") - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - garbage, x1, x2, y_double, garbage = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_double diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index d08c0f2d..60fe30b8 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -1,95 +1,85 @@ -import logging +import functools -from keras.optimizers import SGD -from keras.regularizers import l2 +import torch.nn as nn -from csrank.core.fate_network import FATENetwork -from csrank.losses import hinged_rank_loss -from csrank.metrics import zero_one_rank_loss_for_scores_ties -from csrank.objectranking.object_ranker import ObjectRanker +from csrank.modules.object_mapping import DenseNeuralNetwork +from csrank.modules.scoring import FATEScoring +from csrank.objectranking.object_ranker import SkorchObjectRanker +from csrank.rank_losses import HingedRankLoss -logger = logging.getLogger(__name__) +class FATEObjectRanker(SkorchObjectRanker): + """A ranking estimator based on the FATE-Approach. + + Trains a model that first aggregates all objects into a context, then + evaluates each object within this context. + + The resulting model can then be used for context-sensitive ranking. + + Refer to skorch's documentation for supported parameters. + + Parameters + ---------- + n_hidden_set_layers : int + The number of hidden layers that should be used for the ``DeepSet`` + context embedding. + + n_hidden_set_untis : int + The number of units per hidden layer that should be used for the + ``DeepSet`` context embedding. + + n_hidden_joint_layers : int + The number of hidden layers that should be used for the utility + function that evaluates each object in the aggregated context. + + n_hidden_joint_units : int + The number of units per hidden layer that should used for the utility + function that evaluates each object in the aggregated context. + + activation : torch activation function (class) + The activation function that should be used for each layer of the two + ("set" and "joint) neural networks. + + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. + + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``SkorchObjectRanker``. See the documentation of that class for more + details. + """ -class FATEObjectRanker(ObjectRanker, FATENetwork): def __init__( self, n_hidden_set_layers=2, n_hidden_set_units=32, n_hidden_joint_layers=2, n_hidden_joint_units=32, - activation="selu", - kernel_initializer="lecun_normal", - kernel_regularizer=l2, - optimizer=SGD, - batch_size=256, - loss_function=hinged_rank_loss, - metrics=(zero_one_rank_loss_for_scores_ties,), - random_state=None, - **kwargs, + activation=nn.SELU, + criterion=HingedRankLoss, + **kwargs ): - """ - Create a FATE-network architecture for leaning object ranking function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a context-dependent utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The ranking for the given query set :math:`Q` is defined as: - - .. math:: - ρ(Q) = \\operatorname{argsort}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_layers : int - Number of set layers. - n_hidden_set_units : int - Number of hidden set units. - n_hidden_joint_layers : int - Number of joint layers. - n_hidden_joint_units : int - Number of joint units. - activation : string or function - Activation function to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - metrics : list - List of evaluation metrics (can be non-differentiable) - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - self.loss_function = loss_function - self.metrics = metrics - super().__init__( - n_hidden_set_layers=n_hidden_set_layers, - n_hidden_set_units=n_hidden_set_units, - n_hidden_joint_layers=n_hidden_joint_layers, - n_hidden_joint_units=n_hidden_joint_units, - activation=activation, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - optimizer=optimizer, - batch_size=batch_size, - random_state=random_state, - **kwargs, + self.n_hidden_set_layers = n_hidden_set_layers + self.n_hidden_set_units = n_hidden_set_units + self.n_hidden_joint_layers = n_hidden_joint_layers + self.n_hidden_joint_units = n_hidden_joint_units + self.activation = activation + super().__init__(module=FATEScoring, criterion=criterion, **kwargs) + + def _get_extra_module_parameters(self): + """Return extra parameters that should be passed to the module.""" + params = super()._get_extra_module_parameters() + params["pairwise_utility_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_joint_layers, + units_per_hidden=self.n_hidden_joint_units, + activation=self.activation(), + output_size=1, + ) + params["embedding_module"] = functools.partial( + DenseNeuralNetwork, + hidden_layers=self.n_hidden_set_layers, + units_per_hidden=self.n_hidden_set_units, + activation=self.activation(), ) + return params diff --git a/csrank/objectranking/fatelinear_object_ranker.py b/csrank/objectranking/fatelinear_object_ranker.py deleted file mode 100644 index 16979f80..00000000 --- a/csrank/objectranking/fatelinear_object_ranker.py +++ /dev/null @@ -1,60 +0,0 @@ -import logging - -from csrank.core.fate_linear import FATELinearCore -from csrank.losses import hinged_rank_loss -from .object_ranker import ObjectRanker - -logger = logging.getLogger(__name__) - - -class FATELinearObjectRanker(ObjectRanker, FATELinearCore): - def __init__( - self, - n_hidden_set_units=32, - loss_function=hinged_rank_loss, - learning_rate=1e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - n_hidden_set_units=n_hidden_set_units, - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py deleted file mode 100644 index a9f69904..00000000 --- a/csrank/objectranking/feta_object_ranker.py +++ /dev/null @@ -1,100 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 - -from csrank.core.feta_network import FETANetwork -from csrank.losses import hinged_rank_loss -from .object_ranker import ObjectRanker - -__all__ = ["FETAObjectRanker"] -logger = logging.getLogger(__name__) - - -class FETAObjectRanker(ObjectRanker, FETANetwork): - def __init__( - self, - n_hidden=2, - n_units=8, - add_zeroth_order_model=False, - max_number_of_objects=5, - num_subsample=5, - loss_function=hinged_rank_loss, - batch_normalization=False, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="selu", - optimizer=SGD, - metrics=(), - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FETA-network architecture for object ranking. The first-evaluate-then-aggregate approach - approximates the context-dependent utility function using the first-order utility function - :math:`U_1 \\colon \\mathcal{X} \\times \\mathcal{X} \\rightarrow [0,1]` and zeroth-order utility - function :math:`U_0 \\colon \\mathcal{X} \\rightarrow [0,1]`. - The scores each object :math:`x` using a context-dependent utility function :math:`U (x, C_i)`: - - .. math:: - U(x_i, C_i) = U_0(x_i) + \\frac{1}{n-1} \\sum_{x_j \\in Q \\setminus \\{x_i\\}} U_1(x_i , x_j) \\, . - - Training and prediction complexity is quadratic in the number of objects. - The ranking for the given query set :math:`Q` is defined as: - - .. math:: - ρ(Q) = \\operatorname{argsort}_{x_i \\in Q} \\; U (x_i, C_i) - - Parameters - ---------- - n_hidden : int - Number of hidden layers - n_units : int - Number of hidden units in each layer - add_zeroth_order_model : bool - True if the model should include a latent utility function - max_number_of_objects : int - The maximum number of objects to train from - num_subsample : int - Number of objects to subsample to - loss_function : function - Differentiable loss function for the score vector - batch_normalization : bool - Whether to use batch normalization in the hidden layers - kernel_regularizer : uninitialized keras regularizer - Regularizer to use in the hidden units - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : string or function - Activation function to use in the hidden units - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of evaluation metrics (can be non-differentiable) - batch_size : int - Batch size to use for training - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the hidden units - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - add_zeroth_order_model=add_zeroth_order_model, - max_number_of_objects=max_number_of_objects, - num_subsample=num_subsample, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) diff --git a/csrank/objectranking/fetalinear_object_ranker.py b/csrank/objectranking/fetalinear_object_ranker.py deleted file mode 100644 index 4fee243e..00000000 --- a/csrank/objectranking/fetalinear_object_ranker.py +++ /dev/null @@ -1,58 +0,0 @@ -import logging - -from csrank.core.feta_linear import FETALinearCore -from csrank.losses import hinged_rank_loss -from .object_ranker import ObjectRanker - -logger = logging.getLogger(__name__) - - -class FETALinearObjectRanker(ObjectRanker, FETALinearCore): - def __init__( - self, - loss_function=hinged_rank_loss, - learning_rate=5e-3, - batch_size=256, - random_state=None, - **kwargs, - ): - """ - Create a FATELinear-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate - approach learns an embedding of each object and then aggregates that into a context representation - :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function - :math:`U (x, \\mu_{C(x)})`. - To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`. - The context-representation is evaluated as: - - .. math:: - \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y) - - where :math:`\\phi \\colon \\mathcal{X} \\to \\mathcal{Z}` maps each object :math:`y` to an - :math:`m`-dimensional embedding space :math:`\\mathcal{Z} \\subseteq \\mathbb{R}^m`. - Training complexity is quadratic in the number of objects and prediction complexity is only linear. - The discrete choice for the given query set :math:`Q` is defined as: - - .. math:: - - dc(Q) := \\operatorname{argmax}_{x \\in Q} \\; U (x, \\mu_{C(x)}) - - Parameters - ---------- - n_hidden_set_units : int - Number of hidden set units. - batch_size : int - Batch size to use for training - loss_function : function - Differentiable loss function for the score vector - random_state : int or object - Numpy random state - **kwargs - Keyword arguments for the @FATENetwork - """ - super().__init__( - learning_rate=learning_rate, - batch_size=batch_size, - loss_function=loss_function, - random_state=random_state, - **kwargs, - ) diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py deleted file mode 100644 index 8f5bac6a..00000000 --- a/csrank/objectranking/list_net.py +++ /dev/null @@ -1,257 +0,0 @@ -import logging - -from keras import Input -from keras.layers import concatenate -from keras.layers import Dense -from keras.models import Model -from keras.optimizers import SGD -from keras.regularizers import l2 -from sklearn.utils import check_random_state - -from csrank.layers import create_input_lambda -from csrank.layers import NormalizedDense -from csrank.learner import Learner -from csrank.losses import plackett_luce_loss -from csrank.metrics import zero_one_rank_loss_for_scores_ties -from csrank.objectranking.object_ranker import ObjectRanker - -__all__ = ["ListNet"] -logger = logging.getLogger(__name__) - - -class ListNet(ObjectRanker, Learner): - def __init__( - self, - n_top=1, - n_hidden=2, - n_units=8, - loss_function=plackett_luce_loss, - batch_normalization=False, - kernel_regularizer=l2, - activation="selu", - kernel_initializer="lecun_normal", - optimizer=SGD, - metrics=(zero_one_rank_loss_for_scores_ties,), - batch_size=256, - random_state=None, - **kwargs, - ): - """ Create an instance of the ListNet architecture. ListNet trains a latent utility model based on - top-k-subrankings of the objects. This network learns a latent utility score for each object in the given - query set :math:`Q = \\{x_1, \\ldots ,x_n\\}` using the equation :math:`U(x) = F(x, w)` where :math:`w` is the - weight vector. A listwise loss function like the negative Plackett-Luce likelihood is used for training. - The ranking for the given query set :math:`Q` is defined as: - - .. math:: - - ρ(Q) = \\operatorname{argsort}_{x \\in Q} \\; U(x) - - Parameters - ---------- - n_top : int - Size of the top-k-subrankings to consider for training - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Listwise loss function which is applied on the top-k objects - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - kernel_regularizer__{kwarg} - Arguments to be passed to the kernel regularizer on initialization, such as kernel_regularizer__l. - activation : function or string - Type of activation function to use in each hidden layer - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be - non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudorandom generator or a RandomState instance - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - hidden_dense_layer__{kwarg} - Arguments to be passed to the Dense layers (or NormalizedDense - if batch_normalization is enabled). See the keras documentation - for those classes for available options. - - References - ---------- - [1] Z. Cao, T. Qin, T. Liu, M. Tsai and H. Li. "Learning to Rank: From Pairwise Approach to Listwise Approach." ICML, 2007. - """ - self.n_top = n_top - self.batch_normalization = batch_normalization - self.activation = activation - self.metrics = metrics - self.kernel_regularizer = kernel_regularizer - self.kernel_initializer = kernel_initializer - self.loss_function = loss_function - self.optimizer = optimizer - self.n_hidden = n_hidden - self.n_units = n_units - - self.batch_size = batch_size - self.random_state = random_state - self._store_kwargs( - kwargs, {"hidden_dense__", "optimizer__", "kernel_regularizer__"} - ) - - def _construct_layers(self): - self.input_layer = Input(shape=(self.n_top, self.n_object_features_fit_)) - self.output_node = Dense( - 1, activation="linear", kernel_regularizer=self.kernel_regularizer_ - ) - hidden_dense_kwargs = { - "kernel_regularizer": self.kernel_regularizer_, - "kernel_initializer": self.kernel_initializer, - "activation": self.activation, - } - hidden_dense_kwargs.update(self._get_prefix_attributes("hidden_dense_layer__")) - if self.batch_normalization: - self.hidden_layers = [ - NormalizedDense( - self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs - ) - for x in range(self.n_hidden) - ] - else: - self.hidden_layers = [ - Dense(self.n_units, name="hidden_{}".format(x), **hidden_dense_kwargs) - for x in range(self.n_hidden) - ] - assert len(self.hidden_layers) == self.n_hidden - - def _create_topk(self, X, Y): - n_inst, n_obj, n_feat = X.shape - mask = Y < self.n_top - X_topk = X[mask].reshape(n_inst, self.n_top, n_feat) - Y_topk = Y[mask].reshape(n_inst, self.n_top) - return X_topk, Y_topk - - def _pre_fit(self): - super()._pre_fit() - self.random_state_ = check_random_state(self.random_state) - self._initialize_optimizer() - self._initialize_regularizer() - - def fit( - self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd - ): - """ - Fit an object ranking learning ListNet on the top-k-subrankings in the provided set of queries. The provided - queries can be of a fixed size (numpy arrays). For fitting the model we maximize the Plackett-Luce - likelihood. For example for query set :math:`Q = \\{x_1,x_2,x_3\\}`, the scores are :math:`Q = (s_1,s_2,s_3)` - and the ranking is :math:`\\pi = (3,1,2)`. The Plackett-Luce likelihood is defined as: - - .. math:: - P_l(\\pi) = \\frac{s_2}{s_1+s_2+s_3} \\cdot \\frac{s_3}{s_1+s_3} \\cdot \\frac{s_1}{s_1} - - Note: For k=2 we obtain :class:`RankNet` as a special case. - - Parameters - ---------- - X : numpy array - (n_instances, n_objects, n_features) - Feature vectors of the objects - Y : numpy array - (n_instances, n_objects) - Rankings of the given objects - epochs : int - Number of epochs to run if training for a fixed query size - callbacks : list - List of callbacks to be called during optimization - validation_split : float - Percentage of instances to split off to validate on - verbose : bool - Print verbose information - **kwd - Keyword arguments for the fit function - """ - self._pre_fit() - _n_instances, _n_objects, self.n_object_features_fit_ = X.shape - self._construct_layers() - logger.debug("Creating top-k dataset") - X, Y = self._create_topk(X, Y) - logger.debug("Finished creating the dataset") - - logger.debug("Creating the model") - self.model_ = self.construct_model() - logger.debug("Finished creating the model, now fitting...") - self.model_.fit( - X, - Y, - batch_size=self.batch_size, - epochs=epochs, - callbacks=callbacks, - validation_split=validation_split, - verbose=verbose, - **kwd, - ) - logger.debug("Fitting Complete") - return self - - def construct_model(self): - """ - Construct the ListNet architecture which takes topk-subrankings from the given queries and minimize a - listwise loss on the utility scores of top objects. Weight sharing guarantees that we learn the shared - weights :math:`w` of the latent utility function :math:`U(x) = F(x, w)`. - - Returns - ------- - model: keras model :class:`Model` - ListNet model used to learn the utiliy function using the top-k-subrankings in the provided set of queries. - """ - hid = [create_input_lambda(i)(self.input_layer) for i in range(self.n_top)] - for hidden_layer in self.hidden_layers: - hid = [hidden_layer(x) for x in hid] - outputs = [self.output_node(x) for x in hid] - merged = concatenate(outputs) if len(outputs) > 1 else outputs[0] - model = Model(inputs=self.input_layer, outputs=merged) - model.compile( - loss=self.loss_function, - optimizer=self.optimizer_, - metrics=list(self.metrics), - ) - return model - - @property - def scoring_model(self): - """ - Creates a scoring model from the trained ListNet, which predicts the utility scores for given set of objects. - This network consist of a sequential network which predicts the utility score for each object :math:`x \\in Q` - using the latent utility function :math:`U(x) = F(x, w)` where :math:`w` is the weights of the model. - - Returns - ------- - scoring_model: keras model :class:`Model` - scoring model used to predict utility score for each object - """ - if not hasattr(self, "scoring_model_"): - logger.info("Creating scoring model") - inp = Input(shape=(self.n_object_features_fit_,)) - x = inp - for hidden_layer in self.hidden_layers: - x = hidden_layer(x) - output_score = self.output_node(x) - self.scoring_model_ = Model(inputs=inp, outputs=output_score) - return self.scoring_model_ - - def _predict_scores_fixed(self, X, **kwargs): - n_inst, n_obj, n_feat = X.shape - logger.info("For Test instances {} objects {} features {}".format(*X.shape)) - inp = Input(shape=(n_obj, n_feat)) - lambdas = [create_input_lambda(i)(inp) for i in range(n_obj)] - scores = concatenate([self.scoring_model(lam) for lam in lambdas]) - model = Model(inputs=inp, outputs=scores) - return model.predict(X) diff --git a/csrank/objectranking/object_ranker.py b/csrank/objectranking/object_ranker.py index 0fde77d0..19414a0b 100644 --- a/csrank/objectranking/object_ranker.py +++ b/csrank/objectranking/object_ranker.py @@ -1,9 +1,11 @@ from abc import ABCMeta from csrank.constants import OBJECT_RANKING +from csrank.learner import SkorchInstanceEstimator from csrank.numpy_util import scores_to_rankings +from csrank.rank_losses import HingedRankLoss -__all__ = ["ObjectRanker"] +__all__ = ["ObjectRanker", "SkorchObjectRanker"] class ObjectRanker(metaclass=ABCMeta): @@ -39,3 +41,30 @@ def predict_for_scores(self, scores, **kwargs): else: result = scores_to_rankings(scores) return result + + +class SkorchObjectRanker(ObjectRanker, SkorchInstanceEstimator): + """Base estimator for torch-based ranking. + + This makes it very simple to derive new estimators with any given scoring + module. Refer to skorch's documentation for supported parameters. For + example the optimizer or the optimizer's learning rate could be overridden. + + Parameters + ---------- + module : torch module (class) + This is the scoring module. It should be an uninstantiated + ``torch.nn.Module`` class that expects the number of features per + object as its only parameter on initialization. + + criterion : torch criterion (class) + The criterion that is used to evaluate and optimize the module. + + **kwargs : skorch NeuralNet arguments + All keyword arguments are passed to the constructor of + ``skorch.NeuralNet``. See the documentation of that class for more + details. + """ + + def __init__(self, module, criterion=HingedRankLoss, **kwargs): + super().__init__(module=module, criterion=criterion, **kwargs) diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py deleted file mode 100644 index 0cbea2ee..00000000 --- a/csrank/objectranking/rank_net.py +++ /dev/null @@ -1,99 +0,0 @@ -import logging - -from keras.optimizers import SGD -from keras.regularizers import l2 - -from csrank.core.ranknet_core import RankNetCore -from csrank.dataset_reader.objectranking.util import generate_complete_pairwise_dataset -from csrank.objectranking.object_ranker import ObjectRanker - -__all__ = ["RankNet"] -logger = logging.getLogger(__name__) - - -class RankNet(ObjectRanker, RankNetCore): - def __init__( - self, - n_hidden=2, - n_units=8, - loss_function="binary_crossentropy", - batch_normalization=True, - kernel_regularizer=l2, - kernel_initializer="lecun_normal", - activation="relu", - optimizer=SGD, - metrics=("binary_accuracy",), - batch_size=256, - random_state=None, - **kwargs, - ): - """ Create an instance of the :class:`RankNetCore` architecture for learning a object ranking function. - It breaks the preferences into pairwise comparisons and learns a latent utility model for the objects. - This network learns a latent utility score for each object in the given query set - :math:`Q = \\{x_1, \\ldots ,x_n\\}` using the equation :math:`U(x) = F(x, w)` where :math:`w` is the weight - vector. It is estimated using *pairwise preferences* generated from the rankings. - The ranking for the given query set :math:`Q` is defined as: - - .. math:: - - ρ(Q) = \\operatorname{argsort}_{x \\in Q} \\; U(x) - - Parameters - ---------- - n_hidden : int - Number of hidden layers used in the scoring network - n_units : int - Number of hidden units in each layer of the scoring network - loss_function : function or string - Loss function to be used for the binary decision task of the - pairwise comparisons - batch_normalization : bool - Whether to use batch normalization in each hidden layer - kernel_regularizer : uninitialized keras regularizer - Regularizer function applied to all the hidden weight matrices. - kernel_initializer : function or string - Initialization function for the weights of each hidden layer - activation : function or string - Type of activation function to use in each hidden layer - optimizer: Class - Uninitialized optimizer class following the keras optimizer interface. - optimizer__{kwarg} - Arguments to be passed to the optimizer on initialization, such as optimizer__lr. - metrics : list - List of metrics to evaluate during training (can be non-differentiable) - batch_size : int - Batch size to use during training - random_state : int, RandomState instance or None - Seed of the pseudo-random generator or a RandomState instance - **kwargs - Keyword arguments for the algorithms - - References - ---------- - [1] Burges, C. et al. (2005, August). "Learning to rank using gradient descent.", In Proceedings of the 22nd international conference on Machine learning (pp. 89-96). ACM. - - [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581). - - """ - super().__init__( - n_hidden=n_hidden, - n_units=n_units, - loss_function=loss_function, - batch_normalization=batch_normalization, - kernel_regularizer=kernel_regularizer, - kernel_initializer=kernel_initializer, - activation=activation, - optimizer=optimizer, - metrics=metrics, - batch_size=batch_size, - random_state=random_state, - **kwargs, - ) - logger.info("Initializing network") - - def _convert_instances_(self, X, Y): - logger.debug("Creating the Dataset") - garbage, x1, x2, garbage, y_single = generate_complete_pairwise_dataset(X, Y) - del garbage - logger.debug("Finished the Dataset instances {}".format(x1.shape[0])) - return x1, x2, y_single diff --git a/csrank/rank_losses.py b/csrank/rank_losses.py new file mode 100644 index 00000000..0149158d --- /dev/null +++ b/csrank/rank_losses.py @@ -0,0 +1,82 @@ +"""Loss functions for ranking choice problems.""" + +import torch + + +class HingedRankLoss: + """Compute the pairwise loss between two lists of rankings. + + Assumes the true ranking is represented by a permutation array (list of + indices). The scores are floats, which should be high for low ranking + indices. + + Their relative order of the scores (i.e. their reversed argsort) and their + difference is important for the resulting loss. Even scores that result in + a correct ranking will be penalized if the scores of two elements differ by + less than 1. For example if we have objects + + >>> objects = ["a", "b", "c"] + + then the true ranking (permutation array) + + >>> ranking = [0, 2, 1] + + and the scores + + >>> scores = [0, -10, -5] + + would equivalently specify the ranking ["a, "c", "b"] and consequently + + >>> hinged_rank_loss = HingedRankLoss() + >>> hinged_rank_loss(torch.tensor([scores]), torch.tensor([ranking])) + tensor(0.) + + A rescaled scoring with an insufficient gap would lead to a non-zero loss: + + >>> scores = [0, -1, -0.5] + >>> hinged_rank_loss(torch.tensor([scores]), torch.tensor([ranking])) + tensor(0.3333) + + The ranking + + >>> ranking = [0, 2, 1] + + is not matched by the scoring + + >>> scores = [-2, 0, -1] + + and thus + + >>> hinged_rank_loss(torch.tensor([scores]), torch.tensor([ranking])) + tensor(2.3333) + + Parameters + ---------- + comparison_scores: 2d tensor + The predicted scores for each object of each instance. + + true_rankings: 2d tensor + The true rankings, represented as a permutation. The first element of a + permutation contains the index to which the first element should be + moved. + + Returns + ------- + torch.float + The total loss, summed over all instances. + """ + + # The argument order is chosen to be compatible with skorch. + def __call__(self, comparison_scores, true_rankings): + # 2d matrix which is 1 if the row-element *should* be ranked higher than the column element + mask = true_rankings[:, :, None] > true_rankings[:, None] + # How much higher/lower the elements are actually ranked. First create + # new dimensions (at the element/instance level), then rely on + # broadcasting to compute the difference. + # Negated because higher scores imply lower rankings. + diff = -(comparison_scores[:, :, None] - comparison_scores[:, None]) + self.diff = diff + hinge = torch.clamp(mask * (1 - diff), min=0) + n = torch.sum(mask, axis=(1, 2)) + losses = torch.true_divide(torch.sum(hinge, axis=(1, 2)), n) + return losses.sum() diff --git a/csrank/tensorflow_util.py b/csrank/tensorflow_util.py deleted file mode 100644 index 2c65353f..00000000 --- a/csrank/tensorflow_util.py +++ /dev/null @@ -1,90 +0,0 @@ -import logging -import multiprocessing -import os - -from keras import backend as K -import numpy as np -import tensorflow as tf -from tensorflow.python.client import device_lib - - -def scores_to_rankings(n_objects, y_pred): - # indices = orderings - toprel, orderings = tf.nn.top_k(y_pred, n_objects) - # indices = rankings - troprel, rankings = tf.nn.top_k(orderings, n_objects) - rankings = K.cast(rankings[:, ::-1], dtype="float32") - return rankings - - -def get_instances_objects(y_true): - n_objects = K.cast(K.int_shape(y_true)[1], "int32") - total = K.cast(K.greater_equal(y_true, 0), dtype="int32") - n_instances = K.cast(tf.reduce_sum(total) / n_objects, dtype="int32") - return n_instances, n_objects - - -def tensorify(x): - """Converts x into a Keras tensor""" - if not isinstance(x, (tf.Tensor, tf.Variable)): - return K.constant(x) - return x - - -def get_tensor_value(x): - if isinstance(x, tf.Tensor): - return K.get_value(x) - return x - - -def configure_numpy_keras(seed=42): - tf.set_random_seed(seed) - os.environ["KERAS_BACKEND"] = "tensorflow" - devices = [x.name for x in device_lib.list_local_devices()] - logger = logging.getLogger("ConfigureKeras") - logger.info("Devices {}".format(devices)) - n_gpus = len( - [x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"] - ) - if n_gpus == 0: - config = tf.ConfigProto( - intra_op_parallelism_threads=1, - inter_op_parallelism_threads=1, - allow_soft_placement=True, - log_device_placement=False, - device_count={"CPU": multiprocessing.cpu_count() - 2}, - ) - else: - config = tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=True, - intra_op_parallelism_threads=2, - inter_op_parallelism_threads=2, - ) # , gpu_options = gpu_options) - sess = tf.Session(config=config) - K.set_session(sess) - np.random.seed(seed) - logger.info("Number of GPUS {}".format(n_gpus)) - - -def get_mean_loss(metric, y_true, y_pred): - if isinstance(y_pred, dict) and isinstance(y_true, dict): - losses = [] - total_instances = 0 - for n in y_pred.keys(): - loss = eval_loss(metric, y_true[n], y_pred[n]) - if not np.isnan(loss) and not np.isinf(loss): - loss = loss * y_pred[n].shape[0] - total_instances += y_pred[n].shape[0] - losses.append(loss) - losses = np.array(losses) - mean_loss = np.sum(losses) / total_instances - else: - mean_loss = eval_loss(metric, y_true, y_pred) - return mean_loss - - -def eval_loss(metric, y_true, y_pred): - x = metric(y_true, y_pred) - x = get_tensor_value(x) - return np.nanmean(x) diff --git a/csrank/tests/test_callbacks.py b/csrank/tests/test_callbacks.py deleted file mode 100644 index 7cb4a91c..00000000 --- a/csrank/tests/test_callbacks.py +++ /dev/null @@ -1,66 +0,0 @@ -import math -import os - -from keras import Sequential -from keras.layers import Dense -from keras.optimizers import SGD -import numpy as np -import pytest -import tensorflow as tf - -from csrank.callbacks import EarlyStoppingWithWeights -from csrank.callbacks import LRScheduler - -callbacks_dict = { - "EarlyStopping": (EarlyStoppingWithWeights, {"patience": 5, "min_delta": 5e-2}), - "LRScheduler": (LRScheduler, {"epochs_drop": 5, "drop": 0.9}), -} - - -@pytest.fixture(scope="module") -def trivial_classification_problem(): - random_state = np.random.RandomState(123) - x = random_state.randn(200, 2) - w = random_state.rand(2) - y = 1.0 / (1.0 + np.exp(-np.dot(x, w))) - y_true = np.array(y > 0.5, dtype=np.int64) - return x, y_true - - -def create_model(): - lr = 0.015 - model = Sequential() - model.add(Dense(10, activation="relu")) - model.add(Dense(5, activation="relu")) - model.add(Dense(1, activation="sigmoid")) - model.compile(optimizer=SGD(lr=lr), loss="binary_crossentropy") - return model, lr - - -@pytest.mark.parametrize("name", list(callbacks_dict.keys())) -def test_callbacks(trivial_classification_problem, name): - tf.set_random_seed(0) - os.environ["KERAS_BACKEND"] = "tensorflow" - np.random.seed(123) - x, y = trivial_classification_problem - epochs = 15 - model, init_lr = create_model() - callback, params = callbacks_dict[name] - callback = callback(**params) - callbacks = [callback] - model.fit(x, y, epochs=epochs, callbacks=callbacks, validation_split=0.1) - rtol = 1e-2 - atol = 5e-4 - if name == "LRScheduler": - epochs_drop, drop = params["epochs_drop"], params["drop"] - step = math.floor(epochs / epochs_drop) - actual_lr = init_lr * math.pow(drop, step) - key = ( - "learning_rate" if "learning_rate" in model.optimizer.get_config() else "lr" - ) - learning_rate = model.optimizer.get_config().get(key, 0.0) - assert np.isclose( - actual_lr, learning_rate, rtol=rtol, atol=atol, equal_nan=False - ) - elif name == "EarlyStopping": - assert callback.stopped_epoch == 6 diff --git a/csrank/tests/test_choice_functions.py b/csrank/tests/test_choice_functions.py index 9f1375ae..002d8a23 100644 --- a/csrank/tests/test_choice_functions.py +++ b/csrank/tests/test_choice_functions.py @@ -1,19 +1,14 @@ -import os - -from keras.optimizers import SGD import numpy as np from pymc3.variational.callbacks import CheckParametersConvergence import pytest -import tensorflow as tf +import torch +from torch import optim -from csrank.choicefunction import * -from csrank.constants import CMPNET_CHOICE +from csrank.choicefunction import FATEChoiceFunction +from csrank.choicefunction import GeneralizedLinearModel +from csrank.choicefunction import PairwiseSVMChoiceFunction from csrank.constants import FATE_CHOICE -from csrank.constants import FATELINEAR_CHOICE -from csrank.constants import FETA_CHOICE -from csrank.constants import FETALINEAR_CHOICE from csrank.constants import GLM_CHOICE -from csrank.constants import RANKNET_CHOICE from csrank.constants import RANKSVM_CHOICE from csrank.metrics_np import auc_score from csrank.metrics_np import f1_measure @@ -26,24 +21,26 @@ "Informedness": instance_informedness, "AucScore": auc_score, } -optimizer_common_args = { - "optimizer": SGD, - "optimizer__lr": 1e-3, - "optimizer__momentum": 0.9, - "optimizer__nesterov": True, -} def get_vals(values): return dict(zip(choice_metrics.keys(), values)) +skorch_common_args = { + "max_epochs": 100, + "optimizer": optim.SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, + # We evaluate the estimators in-sample. These tests are just small + # sanity checks, so overfitting is okay here. + "train_split": None, +} + choice_functions = { - FETA_CHOICE: ( - FETAChoiceFunction, - {"add_zeroth_order_model": True, **optimizer_common_args}, - get_vals([0.946, 0.9684, 0.9998]), - ), + GLM_CHOICE: (GeneralizedLinearModel, {}, get_vals([0.9567, 0.9955, 1.0])), + RANKSVM_CHOICE: (PairwiseSVMChoiceFunction, {}, get_vals([0.9522, 0.9955, 1.0])), FATE_CHOICE: ( FATEChoiceFunction, { @@ -51,48 +48,36 @@ def get_vals(values): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - **optimizer_common_args, + **skorch_common_args, }, - get_vals([0.8185, 0.6070, 0.9924]), - ), - FATELINEAR_CHOICE: ( - FATELinearChoiceFunction, - {}, - get_vals([0.6558, 0.0722, 0.9998]), - ), - FETALINEAR_CHOICE: ( - FETALinearChoiceFunction, - {}, - get_vals([0.8782, 0.8894, 0.9998]), + get_vals([0.7177, 0.3119, 1.0]), ), - RANKNET_CHOICE: ( - RankNetChoiceFunction, - optimizer_common_args.copy(), - get_vals([0.9522, 0.9866, 1.0]), - ), - CMPNET_CHOICE: ( - CmpNetChoiceFunction, - optimizer_common_args.copy(), - get_vals([0.8554, 0.8649, 0.966]), - ), - GLM_CHOICE: (GeneralizedLinearModel, {}, get_vals([0.9567, 0.9955, 1.0])), - RANKSVM_CHOICE: (PairwiseSVMChoiceFunction, {}, get_vals([0.9522, 0.9955, 1.0])), } @pytest.fixture(scope="module") def trivial_choice_problem(): random_state = np.random.RandomState(42) - x = random_state.randn(200, 5, 1) - y_true = np.array(x.squeeze(axis=-1) > np.mean(x)) + # pytorch uses 32 bit floats by default. That should be precise enough and + # makes it easier to use pytorch and non-pytorch estimators interchangeably. + x = random_state.randn(200, 5, 1).astype(np.float32) + # The pytorch estimators expect booleans to be encoded as a 32 bit float + # (1.0 for True, 0.0 for false). + y_true = np.array(x.squeeze(axis=-1) > np.mean(x), dtype=np.float32) return x, y_true @pytest.mark.parametrize("name", list(choice_functions.keys())) def test_choice_function_fixed(trivial_choice_problem, name): - tf.set_random_seed(0) - os.environ["KERAS_BACKEND"] = "tensorflow" np.random.seed(123) + # Pytorch does not guarantee full reproducibility in different settings + # [1]. This may become a problem in the test suite, in which case we should + # increase the tolerance. These are only "sanity checks" on small data sets + # anyway and the exact values do not mean much here. + # [1] https://pytorch.org/docs/stable/notes/randomness.html + torch.manual_seed(123) + # Trade off performance for better reproducibility. + torch.use_deterministic_algorithms(True) x, y = trivial_choice_problem choice_function = choice_functions[name][0] params, accuracies = choice_functions[name][1], choice_functions[name][2] @@ -107,10 +92,9 @@ def test_choice_function_fixed(trivial_choice_problem, name): "callbacks": [CheckParametersConvergence()], }, ) - elif "linear" in name: - learner.fit(x, y, epochs=10, validation_split=0, verbose=False) else: - learner.fit(x, y, epochs=100, validation_split=0, verbose=False) + learner.fit(x, y) + s_pred = learner.predict_scores(x) y_pred = learner.predict_for_scores(s_pred) y_pred_2 = learner.predict(x) diff --git a/csrank/tests/test_discrete_choice.py b/csrank/tests/test_discrete_choice.py index bbb4b6d2..86636b41 100644 --- a/csrank/tests/test_discrete_choice.py +++ b/csrank/tests/test_discrete_choice.py @@ -1,25 +1,24 @@ -import os - -from keras.optimizers import SGD import numpy as np from pymc3.variational.callbacks import CheckParametersConvergence import pytest -import tensorflow as tf +import torch +from torch import optim -from csrank.constants import CMPNET_DC from csrank.constants import FATE_DC -from csrank.constants import FATELINEAR_DC -from csrank.constants import FETA_DC -from csrank.constants import FETALINEAR_DC from csrank.constants import GEV from csrank.constants import MLM from csrank.constants import MNL from csrank.constants import NLM from csrank.constants import PCL -from csrank.constants import RANKNET_DC from csrank.constants import RANKSVM_DC from csrank.dataset_reader.discretechoice.util import convert_to_label_encoding -from csrank.discretechoice import * +from csrank.discretechoice import FATEDiscreteChoiceFunction +from csrank.discretechoice import GeneralizedNestedLogitModel +from csrank.discretechoice import MixedLogitModel +from csrank.discretechoice import MultinomialLogitModel +from csrank.discretechoice import NestedLogitModel +from csrank.discretechoice import PairedCombinatorialLogit +from csrank.discretechoice import PairwiseSVMDiscreteChoiceFunction from csrank.metrics_np import categorical_accuracy_np from csrank.metrics_np import subset_01_loss from csrank.metrics_np import topk_categorical_accuracy_np @@ -29,34 +28,24 @@ "CategoricalAccuracy": categorical_accuracy_np, "CategoricalTopK2": topk_categorical_accuracy_np(k=2), } -optimizer_common_args = { - "optimizer": SGD, - "optimizer__lr": 1e-3, - "optimizer__momentum": 0.9, - "optimizer__nesterov": True, -} def get_vals(values=[1.0, 1.0]): return dict(zip(metrics.keys(), values)) +skorch_common_args = { + "max_epochs": 100, + "optimizer": optim.SGD, + "optimizer__lr": 1e-3, + "optimizer__momentum": 0.9, + "optimizer__nesterov": True, + # We evaluate the estimators in-sample. These tests are just small + # sanity checks, so overfitting is okay here. + "train_split": None, +} + discrete_choice_functions = { - FETA_DC: ( - FETADiscreteChoiceFunction, - {"n_hidden": 1, **optimizer_common_args}, - get_vals([0.978, 1.0]), - ), - RANKNET_DC: ( - RankNetDiscreteChoiceFunction, - optimizer_common_args.copy(), - get_vals([0.97, 0.996]), - ), - CMPNET_DC: ( - CmpNetDiscreteChoiceFunction, - optimizer_common_args.copy(), - get_vals([0.994, 1.0]), - ), FATE_DC: ( FATEDiscreteChoiceFunction, { @@ -64,16 +53,10 @@ def get_vals(values=[1.0, 1.0]): "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, - **optimizer_common_args, + **skorch_common_args, }, - get_vals([0.95, 0.998]), - ), - FATELINEAR_DC: ( - FATELinearDiscreteChoiceFunction, - {"n_hidden_set_units": 1, "learning_rate": 5e-3, "batch_size": 32}, - get_vals([0.74, 0.934]), + get_vals([1.0, 1.0]), ), - FETALINEAR_DC: (FETALinearDiscreteChoiceFunction, {}, get_vals([0.976, 0.9998])), MNL: (MultinomialLogitModel, {}, get_vals([0.998, 1.0])), NLM: (NestedLogitModel, {}, get_vals()), PCL: (PairedCombinatorialLogit, {}, get_vals()), @@ -86,7 +69,9 @@ def get_vals(values=[1.0, 1.0]): @pytest.fixture(scope="module") def trivial_discrete_choice_problem(): random_state = np.random.RandomState(42) - x = random_state.randn(500, 5, 2) + # pytorch uses 32 bit floats by default. That should be precise enough and + # makes it easier to use pytorch and non-pytorch estimators interchangeably. + x = random_state.randn(500, 5, 2).astype(np.float32) w = random_state.rand(2) y_true = np.argmax(np.dot(x, w), axis=1) y_true = convert_to_label_encoding(y_true, 5) @@ -95,9 +80,11 @@ def trivial_discrete_choice_problem(): @pytest.mark.parametrize("name", list(discrete_choice_functions.keys())) def test_discrete_choice_function_fixed(trivial_discrete_choice_problem, name): - tf.set_random_seed(0) - os.environ["KERAS_BACKEND"] = "tensorflow" np.random.seed(123) + # There are some caveats with pytorch reproducibility. See the comment on + # the corresponding line of `test_choice_functions.py` for details. + torch.manual_seed(123) + torch.use_deterministic_algorithms(True) x, y = trivial_discrete_choice_problem choice_function = discrete_choice_functions[name][0] params, accuracies = ( @@ -115,10 +102,8 @@ def test_discrete_choice_function_fixed(trivial_discrete_choice_problem, name): "callbacks": [CheckParametersConvergence()], }, ) - elif "linear" in name: - learner.fit(x, y, epochs=10, validation_split=0, verbose=False) else: - learner.fit(x, y, epochs=100, validation_split=0, verbose=False) + learner.fit(x, y) s_pred = learner.predict_scores(x) y_pred = learner.predict_for_scores(s_pred) y_pred_2 = learner.predict(x) diff --git a/csrank/tests/test_fate.py b/csrank/tests/test_fate.py deleted file mode 100644 index 96023824..00000000 --- a/csrank/tests/test_fate.py +++ /dev/null @@ -1,69 +0,0 @@ -from abc import ABCMeta - -from keras import Input -from keras import Model -from keras.regularizers import l2 -import numpy as np - -from csrank import FATENetworkCore -from csrank import FATEObjectRanker -from csrank.tests.test_ranking import optimizer_common_args - - -def test_construction_core(): - n_objects = 3 - n_features = 2 - - # Create mock class: - - class MockClass(FATENetworkCore, metaclass=ABCMeta): - def predict_scores(self, X, **kwargs): - pass - - def _predict_scores_fixed(self, X, **kwargs): - pass - - def predict(self, *args, **kwargs): - pass - - def fit(self, *args, **kwargs): - return self - - grc = MockClass() - grc._initialize_optimizer() - grc._initialize_regularizer() - grc._construct_layers() - input_layer = Input(shape=(n_objects, n_features)) - scores = grc.join_input_layers(input_layer, None, n_layers=0, n_objects=n_objects) - - model = Model(inputs=input_layer, outputs=scores) - model.compile(loss="mse", optimizer=grc.optimizer_) - X = np.random.randn(100, n_objects, n_features) - y = X.sum(axis=2) - model.fit(x=X, y=y, verbose=0) - - -def test_fate_object_ranker_fixed_generator(): - def trivial_ranking_problem_generator(): - while True: - rand = np.random.RandomState(123) - x = rand.randn(10, 5, 1) - y_true = x.argsort(axis=1).argsort(axis=1).squeeze(axis=-1) - yield x, y_true - - fate = FATEObjectRanker( - n_hidden_joint_layers=1, - n_hidden_set_layers=1, - n_hidden_joint_units=5, - n_hidden_set_units=5, - kernel_regularizer=l2, - kernel_regularizer__l=1e-4, - **optimizer_common_args, - ) - fate.fit_generator( - generator=trivial_ranking_problem_generator(), - epochs=1, - validation_split=0, - verbose=False, - steps_per_epoch=10, - ) diff --git a/csrank/tests/test_learner.py b/csrank/tests/test_learner.py deleted file mode 100644 index fcdfe2fd..00000000 --- a/csrank/tests/test_learner.py +++ /dev/null @@ -1,23 +0,0 @@ -from csrank.objectranking import FATEObjectRanker - - -def test_get_and_set_params(): - """Tests the get_params and set_params function of our learners.""" - # FATEObjectRanker is chosen as an arbitrary example; the functions are - # implemented in the learner superclass. - learner = FATEObjectRanker() - params = set(learner.get_params().keys()) - # Regular parameters - assert "activation" in params - assert "kernel_initializer" in params - # Regular nested parameters - assert "optimizer" in params - assert "optimizer__learning_rate" in params - # A special case of a nested parameter, since there is no base - # "hidden_dense_layer" parameter. - assert "hidden_dense_layer__bias_constraint" in params - - # All parameters returned by get_parameters can also be set. - learner.set_params(batch_size=42, optimizer__learning_rate=10) - - assert learner.get_params()["batch_size"] == 42 diff --git a/csrank/tests/test_losses.py b/csrank/tests/test_losses.py deleted file mode 100644 index 0fec2442..00000000 --- a/csrank/tests/test_losses.py +++ /dev/null @@ -1,78 +0,0 @@ -from keras import backend as K -import numpy as np -from numpy.testing import assert_almost_equal - -from csrank.losses import hinged_rank_loss -from csrank.losses import plackett_luce_loss -from csrank.losses import smooth_rank_loss - -decimal = 3 - - -def test_hinged_rank_loss(): - y_true = np.arange(5)[None, :] - y_true_tensor = K.constant(y_true) - - # Predicting all 0, gives an error of 1.0: - assert_almost_equal( - actual=K.eval( - hinged_rank_loss( - y_true_tensor, K.constant(np.array([[0.0, 0.0, 0.0, 0.0, 0.0]])) - ) - ), - desired=np.array([1.0]), - decimal=decimal, - ) - - # Predicting the correct ranking improves, but penalizes by difference of - # scores: - assert_almost_equal( - actual=K.eval( - hinged_rank_loss( - y_true_tensor, K.constant(np.array([[0.2, 0.1, 0.0, -0.1, -0.2]])) - ) - ), - desired=np.array([0.8]), - decimal=decimal, - ) - - -def test_plackett_luce_loss(): - y_true = np.arange(5)[None, :] - y_true_tensor = K.constant(y_true) - assert_almost_equal( - actual=K.eval( - plackett_luce_loss( - y_true_tensor, K.constant(np.array([[0.0, 0.0, 0.0, 0.0, 0.0]])) - ) - ), - desired=np.array([4.78749]), - decimal=decimal, - ) - - -def test_smooth_rank_loss(): - y_true = np.arange(5)[None, :] - y_true_tensor = K.constant(y_true) - - # Predicting all 0, gives an error of 1.0: - assert_almost_equal( - actual=K.eval( - smooth_rank_loss( - y_true_tensor, K.constant(np.array([[0.0, 0.0, 0.0, 0.0, 0.0]])) - ) - ), - desired=np.array([1.0]), - decimal=decimal, - ) - - # Predicting the correct ranking improves, but penalizes by difference of - # scores: - assert_almost_equal( - actual=K.eval( - smooth_rank_loss( - y_true_tensor, K.constant(np.array([[0.2, 0.1, 0.0, -0.1, -0.2]])) - ) - ), - desired=np.array([0.82275984]), - ) diff --git a/csrank/tests/test_metrics.py b/csrank/tests/test_metrics.py deleted file mode 100644 index 2a521680..00000000 --- a/csrank/tests/test_metrics.py +++ /dev/null @@ -1,265 +0,0 @@ -import itertools - -from keras import backend as K -import numpy as np -from numpy.testing import assert_almost_equal -import pytest -from pytest import approx - -from csrank.metrics import err -from csrank.metrics import kendalls_tau_for_scores -from csrank.metrics import make_ndcg_at_k_loss -from csrank.metrics import spearman_correlation_for_scores -from csrank.metrics import zero_one_accuracy -from csrank.metrics import zero_one_accuracy_for_scores -from csrank.metrics import zero_one_rank_loss -from csrank.metrics import zero_one_rank_loss_for_scores -from csrank.metrics import zero_one_rank_loss_for_scores_ties -from csrank.metrics_np import err_np -from csrank.metrics_np import kendalls_tau_for_scores_np -from csrank.metrics_np import spearman_correlation_for_scores_np -from csrank.metrics_np import spearman_correlation_for_scores_scipy -from csrank.metrics_np import zero_one_accuracy_for_scores_np -from csrank.metrics_np import zero_one_rank_loss_for_scores_np -from csrank.metrics_np import zero_one_rank_loss_for_scores_ties_np -from csrank.numpy_util import ranking_ordering_conversion - - -@pytest.fixture(scope="module", params=[(False), (True)], ids=["NoTies", "Ties"]) -def problem_for_pred(request): - ties = request.param - y_true = np.arange(5)[None, :] - # We test the error by swapping one adjacent pair: - if ties: - y_pred = np.array([[0, 2, 1, 2, 3]]) - else: - y_pred = np.array([[0, 2, 1, 3, 4]]) - return y_true, y_pred, ties - - -@pytest.fixture(scope="module", params=[(False), (True)], ids=["NoTies", "Ties"]) -def problem_for_scores(request): - ties = request.param - y_true = np.arange(5)[None, :] - # We test the error by swapping one adjacent pair: - if ties: - y_scores = np.array([[1.0, 0.8, 0.9, 0.8, 0.6]]) - else: - y_scores = np.array([[1.0, 0.8, 0.9, 0.7, 0.6]]) - return y_true, y_scores, ties - - -def test_zero_one_rank_loss(problem_for_pred): - y_true, y_pred, ties = problem_for_pred - score = zero_one_rank_loss(y_true, y_pred) - real_score = K.eval(score) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.15])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.1])) - - -def test_zero_one_rank_loss_for_scores(problem_for_scores): - y_true, y_scores, ties = problem_for_scores - score = zero_one_rank_loss_for_scores(y_true, y_scores) - real_score = K.eval(score) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.15])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.1])) - - score = zero_one_rank_loss_for_scores_ties(y_true, y_scores) - real_score = K.eval(score) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.15])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.1])) - - y_true, y_scores, ties = problem_for_scores - real_score = zero_one_rank_loss_for_scores_np(y_true, y_scores) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.15])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.1])) - - real_score = zero_one_rank_loss_for_scores_ties_np(y_true, y_scores) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.15])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.1])) - - -def test_zero_one_accuracy(problem_for_pred): - y_true, y_pred, ties = problem_for_pred - - score = zero_one_accuracy(y_true, y_pred) - real_score = K.eval(score) - assert_almost_equal(actual=real_score, desired=np.array([0.0])) - - y_true, y_pred, ties = problem_for_pred - - real_score = zero_one_accuracy_for_scores_np(y_true, y_pred) - assert_almost_equal(actual=real_score, desired=np.array([0.0])) - - -def test_ndcg(problem_for_pred): - # ties don't matter here because it doesn't change the two highest predictions - y_true, y_pred, _ties = problem_for_pred - # We have: - # y_true = [0, 1, 2, 3, 4] - # y_pred = [0, 2, 1, 2, 3] - - # Inverted (with max_rank = 4) that is - # y_true_inv = [4, 3, 2, 1, 0] - # y_pred_inv = [4, 2, 3, 2, 1] - - # And normalized to [0, 1] this gives us the relevance: - # rel_true = [1, 3/4, 1/2, 1/4, 0] - # rel_pred = [1, 1/2, 3/4, 1/2, 1/4] - - # With this we can first compute the ideal dcg, considering only the first - # k=2 elements (all logs are base 2, equality is approximate): - idcg = (2 ** 1 - 1) / np.log2(2) + (2 ** (3 / 4) - 1) / np.log2(3) # = 1.43 - - # And the dcg of the predictions at the same positions as the elements we - # considered for the idcg (i.e. the "true" best elements): - dcg = (2 ** 1 - 1) / np.log2(2) + (2 ** (1 / 2) - 1) / np.log2(3) # = 1.26 - - # Now the gain is: - expected_gain = dcg / idcg # = 0.882 - - ndcg = make_ndcg_at_k_loss(k=2) - real_gain = K.eval(ndcg(y_true, y_pred)) - - assert_almost_equal(actual=real_gain, desired=expected_gain, decimal=5) - - -def test_kendalls_tau_for_scores(problem_for_scores): - y_true, y_pred, ties = problem_for_scores - - score = kendalls_tau_for_scores(y_true, y_pred) - real_score = K.eval(score) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.7])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.8])) - - real_score = kendalls_tau_for_scores_np(y_true, y_pred) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.7])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.8])) - - -def test_spearman_for_scores(problem_for_scores): - y_true_tensor, y_scores_tensor, ties = problem_for_scores - - score = spearman_correlation_for_scores(y_true_tensor, y_scores_tensor) - real_score = K.eval(score) - if ties: - # We do not handle ties for now - assert True - else: - assert_almost_equal(actual=real_score, desired=np.array([0.9])) - - y_true, y_scores, ties = problem_for_scores - - real_score = spearman_correlation_for_scores_scipy(y_true, y_scores) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.8207827])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.9])) - - real_score = spearman_correlation_for_scores_np(y_true, y_scores) - if ties: - assert True - else: - assert_almost_equal(actual=real_score, desired=np.array([0.9])) - - -def test_zero_one_accuracy_for_scores(problem_for_scores): - y_true_tensor, y_scores_tensor, ties = problem_for_scores - - score = zero_one_accuracy_for_scores(y_true_tensor, y_scores_tensor) - real_score = K.eval(score) - if ties: - assert_almost_equal(actual=real_score, desired=np.array([0.0])) - else: - assert_almost_equal(actual=real_score, desired=np.array([0.0])) - - -def test_err_perfect_first_trumps_many_good(): - """Tests that a perfect document at rank 1 trumps later rankings. - - The authors of [1] list this as a motivating example. A ranking that - puts a "perfect" document at rank 1 (i.e. one that is almost certain - to satisfy the user's needs) should trump one that puts a "good" one - at rank 1, regardless of the documents at later ranks. The reasoning - is that later ranks won't need to be examined when the first is - already sufficient. - - References - ---------- - [1] Chapelle, Olivier, et al. "Expected reciprocal rank for graded - relevance." Proceedings of the 18th ACM conference on Information and - knowledge management. ACM, 2009. http://olivier.chapelle.cc/pub/err.pdf - """ - y_true = ranking_ordering_conversion([range(20)]) - - # gets the "perfect" one right, everything else wrong - perfect_first = ranking_ordering_conversion( - [[0, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]] - ) - - # does pretty good for most, but ranks the "perfect" one wrong - all_good = ranking_ordering_conversion( - [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0]] - ) - - assert K.eval(err(y_true, perfect_first)) > K.eval(err(y_true, all_good)) - - -def test_err_against_manually_verified_example(): - """Compares the implementation against a manual calculation.""" - y_true = ranking_ordering_conversion([[1, 2, 0]]) - y_pred = ranking_ordering_conversion([[2, 1, 0]]) - # The resulting probabilities that each document satisfies the - # user's need: - # [2**1-1, 2**2-1, 2**0 - 1] / 2**2 = [1/4, 3/4, 0] - # Multiplied by the respective rank utilities (1/(r+1)): - # [(1/4)/3, (3/4)/2, 0/1] = [1/12, 3/8, 0] - # The resulting ERR: - - # We ranked object 2 first, which has a true rank of 1 and therefore - # (with the relevance gain probability mapping) a probability of - # (2**(2-1)-1) / 2**2 = 1/4 - # of matching the user's need. It is at rank 0, which has utility - # 1/(0+1) = 1. - - # Object 1 is next. True rank of 0, probability - # (2**(2-0)-1) / 2**2 = 3/4 - # and utility - # 1/(1+1) = 1/2. - - # Object 0 last. True rank of 2, probability - # (2**(2-2)-1) / 2**2 = 0 - # and utility - # 1/(2+1) = 1/3. - - # The resulting expected utility: - # 1/4 * 1 + (1 - 1/4) * 3/4 * 1/2 + (1 - 1/4) * (1 - 3/4) * 0 * 1/3 - # = 17/32 - # Approx because comparing floats is inherently error-prone. - assert K.eval(err(y_true, y_pred)) == approx(17 / 32) - - -def test_err_implementations_equivalent(): - """Spot-checks equivalence of plain python and tf implementations""" - # A simple grading where each grade occurs once. We want to check - # for equivalence at every permutation of this grading. - elems = np.array([4, 3, 2, 1, 0]) - y_true = np.reshape(elems, (1, -1)) - # Spot check some permutations (5! / 20 = 6 checks are performed) - for perm in list(itertools.permutations(elems))[::20]: - perm = np.reshape(perm, (1, -1)) - assert K.eval(err(y_true, perm)) == approx(err_np(y_true, perm)) diff --git a/csrank/tests/test_ranking.py b/csrank/tests/test_ranking.py index ea2b87d8..76f97114 100644 --- a/csrank/tests/test_ranking.py +++ b/csrank/tests/test_ranking.py @@ -1,46 +1,29 @@ -import os - -from keras.optimizers import SGD import numpy as np import pytest -import tensorflow as tf +import torch +from torch import optim -from csrank.constants import CMPNET from csrank.constants import ERR from csrank.constants import FATE_RANKER -from csrank.constants import FATELINEAR_RANKER -from csrank.constants import FETA_RANKER -from csrank.constants import FETALINEAR_RANKER -from csrank.constants import LISTNET -from csrank.constants import RANKNET from csrank.constants import RANKSVM from csrank.metrics_np import zero_one_accuracy_np from csrank.metrics_np import zero_one_rank_loss_for_scores_ties_np -from csrank.objectranking import * -from csrank.objectranking.fate_object_ranker import FATEObjectRanker +from csrank.objectranking import ExpectedRankRegression +from csrank.objectranking import FATEObjectRanker +from csrank.objectranking import RankSVM -optimizer_common_args = { - "optimizer": SGD, +skorch_common_args = { + "max_epochs": 100, + "optimizer": optim.SGD, "optimizer__lr": 1e-3, "optimizer__momentum": 0.9, "optimizer__nesterov": True, + # We evaluate the estimators in-sample. These tests are just small + # sanity checks, so overfitting is okay here. + "train_split": None, } object_rankers = { - FATELINEAR_RANKER: ( - FATELinearObjectRanker, - {"n_hidden_set_units": 12, "batch_size": 1}, - (0.0, 1.0), - ), - FETALINEAR_RANKER: (FETALinearObjectRanker, {}, (0.0, 1.0)), - FETA_RANKER: ( - FETAObjectRanker, - {"add_zeroth_order_model": True, **optimizer_common_args}, - (0.0, 1.0), - ), - RANKNET: (RankNet, optimizer_common_args.copy(), (0.0, 1.0)), - CMPNET: (CmpNet, optimizer_common_args.copy(), (0.0, 1.0),), - LISTNET: (ListNet, {"n_top": 3, **optimizer_common_args}, (0.0, 1.0)), ERR: (ExpectedRankRegression, {}, (0.0, 1.0)), RANKSVM: (RankSVM, {}, (0.0, 1.0)), FATE_RANKER: ( @@ -50,6 +33,7 @@ "n_hidden_set_layers": 1, "n_hidden_joint_units": 5, "n_hidden_set_units": 5, + **skorch_common_args, }, (0.0, 1.0), ), @@ -59,23 +43,24 @@ @pytest.fixture(scope="module") def trivial_ranking_problem(): random_state = np.random.RandomState(123) - x = random_state.randn(2, 5, 1) + # pytorch uses 32 bit floats by default. That should be precise enough and + # makes it easier to use pytorch and non-pytorch estimators interchangeably. + x = random_state.randn(2, 5, 1).astype(np.float32) y_true = x.argsort(axis=1).argsort(axis=1).squeeze(axis=-1) return x, y_true @pytest.mark.parametrize("ranker_name", list(object_rankers.keys())) def test_object_ranker_fixed(trivial_ranking_problem, ranker_name): - tf.set_random_seed(0) - os.environ["KERAS_BACKEND"] = "tensorflow" np.random.seed(123) + # There are some caveats with pytorch reproducibility. See the comment on + # the corresponding line of `test_choice_functions.py` for details. + torch.manual_seed(123) + torch.use_deterministic_algorithms(True) x, y = trivial_ranking_problem ranker, params, (loss, acc) = object_rankers[ranker_name] ranker = ranker(**params) - if "linear" in ranker_name: - ranker.fit(x, y, epochs=10, validation_split=0, verbose=False) - else: - ranker.fit(x, y, epochs=100, validation_split=0, verbose=False) + ranker.fit(x, y) pred_scores = ranker.predict_scores(x) pred_loss = zero_one_rank_loss_for_scores_ties_np(y, pred_scores) rtol = 1e-2 diff --git a/csrank/tests/test_util.py b/csrank/tests/test_util.py deleted file mode 100644 index 731f0319..00000000 --- a/csrank/tests/test_util.py +++ /dev/null @@ -1,28 +0,0 @@ -from keras import backend as K -import numpy as np -import tensorflow as tf - -from csrank import SyntheticIterator -from csrank.tensorflow_util import tensorify - - -def test_tensorify(): - a = np.array([1.0, 2.0]) - out = tensorify(a) - assert isinstance(out, tf.Tensor) - - b = K.zeros((5, 3)) - out = tensorify(b) - assert b == out - - -def test_synthetic_iterator(): - def func(a, b): - return (b, a) - - it = SyntheticIterator(dataset_function=func, a=41, b=2) - for i, (x, y) in enumerate(it): - if i == 1: - break - assert x == 2 - assert y == 41 diff --git a/csrank/util.py b/csrank/util.py index 1f408e02..3c0a076e 100644 --- a/csrank/util.py +++ b/csrank/util.py @@ -7,9 +7,6 @@ import re import sys -from csrank.metrics import make_ndcg_at_k_loss -from csrank.metrics import zero_one_accuracy -from csrank.metrics import zero_one_rank_loss from csrank.metrics_np import f1_measure from csrank.metrics_np import hamming from csrank.metrics_np import instance_informedness @@ -38,9 +35,6 @@ subset_01_loss, hamming, instance_informedness, - zero_one_rank_loss, - zero_one_accuracy, - make_ndcg_at_k_loss, zero_one_accuracy_np, ] diff --git a/definitions.nix b/definitions.nix new file mode 100644 index 00000000..cee80b99 --- /dev/null +++ b/definitions.nix @@ -0,0 +1,32 @@ +{ # `git ls-remote https://github.com/nixos/nixpkgs-channels nixos-unstable` + nixpkgs-rev ? "266dc8c3d052f549826ba246d06787a219533b8f" + # pin nixpkgs to the specified revision if not overridden +, pkgsPath ? builtins.fetchTarball { + name = "nixpkgs-${nixpkgs-rev}"; + url = "https://github.com/nixos/nixpkgs/archive/${nixpkgs-rev}.tar.gz"; + } +, pkgs ? import pkgsPath {} +}: let + lib = pkgs.lib; +in { + inherit pkgs; + pythonEnv = pkgs.poetry2nix.mkPoetryEnv { + projectDir = ./.; + python = pkgs.python38; + overrides = pkgs.poetry2nix.overrides.withDefaults (self: super: { + sphinx-rtd-theme = super.sphinx_rtd_theme; + pillow = super.pillow.overridePythonAttrs ( + old: { + # https://github.com/nix-community/poetry2nix/issues/180 + buildInputs = with pkgs; [ xorg.libX11 ] ++ old.buildInputs; + } + ); + matplotlib = super.matplotlib.overridePythonAttrs ( + old: { + propagatedBuildInputs = (old.propagatedBuildInputs or [ ]) ++ [ self.certifi ]; + } + ); + theano = self.theano-pymc; + }); + }; +} diff --git a/docs/api/choicefunction.rst b/docs/api/choicefunction.rst index 9b644543..486bf906 100644 --- a/docs/api/choicefunction.rst +++ b/docs/api/choicefunction.rst @@ -5,14 +5,10 @@ Choice Functions .. currentmodule:: csrank.choicefunctions .. autosummary:: - FATEChoiceFunction - FETAChoiceFunction - CmpNetChoiceFunction - RankNetChoiceFunction GeneralizedLinearModel PairwiseSVMChoiceFunction .. automodule:: csrank.choicefunctions - :members: FATEChoiceFunction, FETAChoiceFunction, CmpNetChoiceFunction, RankNetChoiceFunction, GeneralizedLinearModel, PairwiseSVMChoiceFunction + :members: GeneralizedLinearModel, PairwiseSVMChoiceFunction :undoc-members: diff --git a/docs/api/discretechoice.rst b/docs/api/discretechoice.rst index 6caebb73..cc1944dd 100644 --- a/docs/api/discretechoice.rst +++ b/docs/api/discretechoice.rst @@ -4,10 +4,6 @@ Discrete Choice Models .. currentmodule:: csrank.discretechoice .. autosummary:: - FATEDiscreteChoiceFunction - FETADiscreteChoiceFunction - RankNetDiscreteChoiceFunction - CmpNetDiscreteChoiceFunction PairwiseSVMDiscreteChoiceFunction GeneralizedNestedLogitModel MixedLogitModel @@ -16,5 +12,5 @@ Discrete Choice Models PairedCombinatorialLogit .. automodule:: csrank.discretechoice - :members: FATEDiscreteChoiceFunction, FETADiscreteChoiceFunction, RankNetDiscreteChoiceFunction, CmpNetDiscreteChoiceFunction, PairwiseSVMDiscreteChoiceFunction, GeneralizedNestedLogitModel, MixedLogitModel, MultinomialLogitModel, NestedLogitModel, PairedCombinatorialLogit + :members: PairwiseSVMDiscreteChoiceFunction, GeneralizedNestedLogitModel, MixedLogitModel, MultinomialLogitModel, NestedLogitModel, PairedCombinatorialLogit :undoc-members: diff --git a/docs/api/objectranking.rst b/docs/api/objectranking.rst index 15ef3467..14ceaf58 100644 --- a/docs/api/objectranking.rst +++ b/docs/api/objectranking.rst @@ -5,14 +5,9 @@ Object Ranking .. currentmodule:: csrank.objectranking .. autosummary:: - FATEObjectRanker - FETAObjectRanker - CmpNet - ListNet - RankNet ExpectedRankRegression RankSVM .. automodule:: csrank.objectranking - :members: FATEObjectRanker, FETAObjectRanker, CmpNet, ListNet, RankNet, ExpectedRankRegression, RankSVM + :members: ExpectedRankRegression, RankSVM :undoc-members: diff --git a/docs/examples.rst b/docs/examples.rst deleted file mode 100644 index df807c65..00000000 --- a/docs/examples.rst +++ /dev/null @@ -1,12 +0,0 @@ -******** -Examples -******** - -.. toctree:: - - notebooks/FATE-Net-Ranking.ipynb - notebooks/FATE-Net-DC.ipynb - notebooks/Rank-Net-Choice.ipynb - notebooks/GeneralizationExperiments.ipynb - notebooks/ParameterOptimizer-quickstart.ipynb - notebooks/Visualize-NeuralNetwork.ipynb diff --git a/docs/intro.rst b/docs/intro.rst index afe8875f..b967ddf2 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -1,121 +1,3 @@ .. _intro: -|Build Status| |Coverage| |Binder| - -************ -Introduction -************ -CS-Rank is a Python package for context-sensitive ranking and choice -algorithms. - -We implement the following new object ranking/choice architectures: - -* FATE (First aggregate then evaluate) -* FETA (First evaluate then aggregate) - -In addition, we also implement these algorithms for choice functions: - -* RankNetChoiceFunction -* GeneralizedLinearModel -* PairwiseSVMChoiceFunction - -These are the state-of-the-art approaches implemented for the discrete choice -setting: - -* GeneralizedNestedLogitModel -* MixedLogitModel -* NestedLogitModel -* PairedCombinatorialLogit -* RankNetDiscreteChoiceFunction -* PairwiseSVMDiscreteChoiceFunction - -Check out our `interactive notebooks`_ to quickly find out what our package can -do. - - -Getting started -=============== -As a simple "Hello World!"-example we will try to learn the Pareto problem: - -.. code-block:: python - - import csrank as cs - from csrank import ChoiceDatasetGenerator - gen = ChoiceDatasetGenerator(dataset_type='pareto', - n_objects=30, - n_features=2) - X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split() - -All our learning algorithms are implemented using the scikit-learn estimator -API. Fitting our FATENet architecture is as simple as calling the ``fit`` -method: - -.. code-block:: python - - fate = cs.FATEChoiceFunction() - fate.fit(X_train, Y_train) - -Predictions can then be obtained using: - -.. code-block:: python - - fate.predict(X_test) - - -Installation ------------- -The latest release version of CS-Rank can be installed from Github as follows:: - - pip install git+https://github.com/kiudee/cs-ranking.git - -Another option is to clone the repository and install CS-Rank using:: - - python setup.py install - - -Dependencies ------------- -CS-Rank depends on Tensorflow, Keras, NumPy, SciPy, matplotlib, scikit-learn, -joblib and tqdm. For data processing and generation you will -also need PyGMO, H5Py and pandas. - - -Citing CS-Rank ----------------- -You can cite our `arXiv papers`_:: - - @article{csrank2019, - author = {Karlson Pfannschmidt and - Pritha Gupta and - Eyke H{\"{u}}llermeier}, - title = {Learning Choice Functions: Concepts and Architectures }, - journal = {CoRR}, - volume = {abs/1901.10860}, - year = {2019} - } - - @article{csrank2018, - author = {Karlson Pfannschmidt and - Pritha Gupta and - Eyke H{\"{u}}llermeier}, - title = {Deep architectures for learning context-dependent ranking functions}, - journal = {CoRR}, - volume = {abs/1803.05796}, - year = {2018} - } - -License --------- -`Apache License, Version 2.0 `_ - -.. |Binder| image:: https://mybinder.org/badge.svg - :target: https://mybinder.org/v2/gh/kiudee/cs-ranking/master?filepath=docs%2Fnotebooks - -.. |Coverage| image:: https://codecov.io/gh/kiudee/cs-ranking/branch/master/graph/badge.svg - :target: https://codecov.io/gh/kiudee/cs-ranking - -.. |Build Status| image:: https://travis-ci.org/kiudee/cs-ranking.svg?branch=master - :target: https://travis-ci.org/kiudee/cs-ranking - -.. _interactive notebooks: https://mybinder.org/v2/gh/kiudee/cs-ranking/master?filepath=docs%2Fnotebooks -.. _arXiv papers: https://arxiv.org/search/cs?searchtype=author&query=Pfannschmidt%2C+K +.. include:: ../README.rst diff --git a/docs/notebooks/.gitignore b/docs/notebooks/.gitignore deleted file mode 100644 index 1928a6fe..00000000 --- a/docs/notebooks/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/new_outputs/ -/gr_vis/ -/fate.ipynb -/othermodels.ipynb -/temporary/ diff --git a/docs/notebooks/FATE-Net-DC.ipynb b/docs/notebooks/FATE-Net-DC.ipynb deleted file mode 100644 index 7dad5bff..00000000 --- a/docs/notebooks/FATE-Net-DC.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Discrete Choice Problem" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n", - "/home/pritha/anaconda3/envs/linenv/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", - " warnings.warn(msg, category=DeprecationWarning)\n" - ] - } - ], - "source": [ - "%%capture --no-stderr\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from csrank import FATEDiscreteChoiceFunction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Medoid DC Problem" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the medoid problem the goal of the discrete choice algorithms for the medoid problem is to find the most central object for the given set.\n", - "This problem is inspired by solving the task of finding a good representation of the given data using the most central point of the data points\n", - "\n", - "We will generate a random dataset where each instance contains 5 objects and 2 features for easy plotting." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from csrank import DiscreteChoiceDatasetGenerator\n", - "seed = 123\n", - "n_train = 10000\n", - "n_test = 10000\n", - "n_features = 2\n", - "n_objects = 5\n", - "gen = DiscreteChoiceDatasetGenerator(dataset_type='medoid', random_state=seed,\n", - " n_train_instances=n_train,\n", - " n_test_instances=n_test,\n", - " n_objects=n_objects,\n", - " n_features=n_features)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us plot a random instance. The medoid is marked as M." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAEvCAYAAAA92bhfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAQv0lEQVR4nO3df2zU933H8ddrhqoubUMYIcEmGYmEvGZLNqJTfjRSFTUwGjcKJFIkIjVDyiRUad3o1LCBIu3PLRtTtU3qplnNJqZGRFNKADV0LrCiqn80qglJgLgeWZc12B64TCRd5axA3vvDbzLw7nx3fO98P3g+JOu+970P9/l+/Q3P3Ndf7uyIEABA+qVWbwAAtAuCCACJIAJAIogAkAgiACSCCABpQas3YC5Lly6NlStXtnozAHSZI0eO/DQibpi9vq2DuHLlSo2MjLR6MwB0Gdv/UW49p8wAkAgiACSCCACJIAJAIogAkAgiACSCCACJIAJAKhRE20tsH7B9Mm+vLzPmZtvftT1q+4TtLUXmBIBmKfoKcZukQxGxStKhvD/bBUlfiYhPSbpX0u/avr3gvADQcEXfurde0gO5vFPSYUl/dPmAiJiUNJnLP7M9Kqlf0psF50aX2XN0XDuGxzRxblp9i3u1dd2ANqzub/Vm4RpSNIg3ZvAUEZO2l8012PZKSaslvVJwXnSZPUfHtX33MU2fvyhJGj83re27j0kSUcS8qXrKbPug7eNlvtbXM5Htj0v6pqQvR8R7c4zbbHvE9sjU1FQ9U6CD7Rge+zCGl0yfv6gdw2Mt2iJci6q+QoyINZUes33a9vJ8dbhc0pkK4xZqJobPR8TuKvMNSRqSpFKpxK8EvEZMnJuuaz3QDEUvquyTtCmXN0naO3uAbUt6TtJoRHy14HzoUn2Le+taDzRD0SA+K2mt7ZOS1uZ92e6zvT/H3C/pSUmftf1afg0WnBddZuu6AfUu7LliXe/CHm1dN9CiLcK1qNBFlYg4K+nBMusnJA3m8vclucg86H6XLpxwlRmt1NafmI1ry4bV/QQQLcVb9wAgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgFQqi7SW2D9g+mbfXzzG2x/ZR298qMicANEvRV4jbJB2KiFWSDuX9SrZIGi04HwA0TdEgrpe0M5d3StpQbpDtFZI+L+nrBecDgKYpGsQbI2JSkvJ2WYVxfynpDyV9UHA+AGiaBdUG2D4o6aYyDz1TywS2H5Z0JiKO2H6ghvGbJW2WpFtuuaWWKQCgIaoGMSLWVHrM9mnbyyNi0vZySWfKDLtf0iO2ByV9VNInbX8jIr5QYb4hSUOSVCqVopadAIBGKHrKvE/SplzeJGnv7AERsT0iVkTESkkbJf1LpRgCQCsVDeKzktbaPilpbd6X7T7b+4tuHADMp6qnzHOJiLOSHiyzfkLSYJn1hyUdLjInADQL71QBgEQQASARRABIBBEAEkEEgEQQASARRABIBBEAEkEEgEQQASAVeuseAMy3PUfHtWN4TBPnptW3uFdb1w1ow+r+hjw3QQTQMfYcHdf23cc0ff6iJGn83LS27z4mSQ2JIqfMADrGjuGxD2N4yfT5i9oxPNaQ5yeIADrGxLnputbXiyAC6Bh9i3vrWl8vggigY2xdN6DehT1XrOtd2KOt6wYa8vxcVAHQMS5dOOEqMwBoJoqNCuBsnDIDQCKIAJAIIgAkgggAiSACQCKIAJAIIgAkgggAiSACQCKIAJAIIgAkgggAiSACQCKIAJAIIgAkgggAiSACQCKIAJAIIgCkQkG0vcT2Adsn8/b6CuMW237R9o9sj9q+r8i8ANAMRV8hbpN0KCJWSTqU98v5K0n/HBG/Kuk3JI0WnBcAGq5oENdL2pnLOyVtmD3A9iclfUbSc5IUEb+IiHMF5wWAhisaxBsjYlKS8nZZmTG3SZqS9A+2j9r+uu1FBecFgIarGkTbB20fL/O1vsY5Fki6S9LfRsRqST9X5VNr2d5se8T2yNTUVI1TAEBxVX9RfUSsqfSY7dO2l0fEpO3lks6UGXZK0qmIeCXvv6g5ghgRQ5KGJKlUKkW17QOARil6yrxP0qZc3iRp7+wBEfGfkt6xPZCrHpT0ZsF5AaDhigbxWUlrbZ+UtDbvy3af7f2Xjfs9Sc/bfkPSb0r6k4LzAkDDVT1lnktEnNXMK77Z6yckDV52/zVJpSJzAUCz8U4VAEgEEQASQQSARBABIBFEAEgEEQASQQSARBABIBFEAEgEEQASQQSARBABIBFEAEgEEQASQQSARBABIBFEAEgEEUDHs60nn3zyw/sXLlzQDTfcoIcffriu5yGIADreokWLdPz4cU1PT0uSDhw4oP7+/rqfhyAC6AoPPfSQXn75ZUnSrl279MQTT9T9HAQRQFfYuHGjXnjhBb3//vt64403dM8999T9HAQRQFe488479fbbb2vXrl0aHBys/gfKKPRrSAGgnTzyyCN6+umndfjwYZ09e7buP08QAXSNp556Stddd53uuOMOHT58uO4/TxABdJw9R8e1Y3hME+em1be4Vxc/CEnSihUrtGXLlqt+XkdEo7ax4UqlUoyMjLR6MwC0kT1Hx7V99zFNn7/44brehT3608fu0IbVtf1TG9tHIqI0ez0XVQB0lB3DY1fEUJKmz1/UjuGxws9NEAF0lIlz03WtrwdBBNBR+hb31rW+HgQRQEfZum5AvQt7rljXu7BHW9cNFH5urjID6CiXLpxcfpV567qBmi+ozIUgAug4G1b3NySAs3HKDACJIAJAIogAkAgiACSCCACJIAJAKhRE20tsH7B9Mm+vrzDuD2yfsH3c9i7bHy0yLwA0Q9FXiNskHYqIVZIO5f0r2O6X9PuSShHx65J6JG0sOC8ANFzRIK6XtDOXd0raUGHcAkm9thdI+pikiYLzAkDDFQ3ijRExKUl5u2z2gIgYl/QXkn4iaVLSuxHxnYLzAkDDVQ2i7YP5s7/ZX+trmSB/rrhe0q2S+iQtsv2FOcZvtj1ie2RqaqrW/QCAwqq+lzki1lR6zPZp28sjYtL2cklnygxbI+nfI2Iq/8xuSZ+W9I0K8w1JGpJmPjG7+i4AQGMUPWXeJ2lTLm+StLfMmJ9Iutf2x2xb0oOSRgvOCwANVzSIz0paa/ukpLV5X7b7bO+XpIh4RdKLkl6VdCznHCo4LwA0HL9kCsA1h18yBQBVEEQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgFQqi7cdtn7D9ge3SHOM+Z3vM9lu2txWZEwCapegrxOOSHpP0vUoDbPdI+pqkhyTdLukJ27cXnBcAGm5BkT8cEaOSZHuuYXdLeisifpxjX5C0XtKbReYGgEabj58h9kt657L7p3IdALSVqq8QbR+UdFOZh56JiL01zFHu5WPMMd9mSZsl6ZZbbqnh6QGgMaoGMSLWFJzjlKSbL7u/QtLEHPMNSRqSpFKpVDGcANBo83HK/ENJq2zfavsjkjZK2jcP8wJAXYr+s5tHbZ+SdJ+kl20P5/o+2/slKSIuSPqSpGFJo5L+KSJOFNtsAGi8oleZX5L0Upn1E5IGL7u/X9L+InMBQLPxThUASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAQRABJBBIBEEAEgEUQASAtavQEAUKs9R8e1Y3hME+em1be4V1vXDWjD6v6GPT9BBNAR9hwd1/bdxzR9/qIkafzctLbvPiZJDYsip8wAOsKO4bEPY3jJ9PmL2jE81rA5CCKAjjBxbrqu9VeDIALoCH2Le+tafzUKBdH247ZP2P7AdqnCmJttf9f2aI7dUmROANemresG1Luw54p1vQt7tHXdQMPmKHpR5bikxyT93RxjLkj6SkS8avsTko7YPhARbxacG8A15NKFk7a9yhwRo5Jke64xk5Imc/lntkcl9UsiiADqsmF1f0MDONu8/gzR9kpJqyW9MseYzbZHbI9MTU3N16YBQPVXiLYPSrqpzEPPRMTeWiey/XFJ35T05Yh4r9K4iBiSNCRJpVIpan1+ACiqahAjYk3RSWwv1EwMn4+I3UWfDwCaoemnzJ75AeNzkkYj4qvNng8ArlbRf3bzqO1Tku6T9LLt4VzfZ3t/Drtf0pOSPmv7tfwaLLTVANAERa8yvyTppTLrJyQN5vL3JVW+DA0AbYJ3qgBAIogAkLrm47+a/TlpALpfVwRxPj4nDUD364pT5vn4nDQA3a8rgjgfn5MGoPt1RRDn43PSAHS/rgjifHxOGoDu1xUXVebjc9IAdL+uCKLU/M9JA9D9uuKUGQAagSACQCKIAJAIIgAkgggAiSACQCKIAJAIIgAkgggAiSACQHJE+/4ueNtTkn4u6aet3pYGWir2p111075I7M9cfiUibpi9sq2DKEm2RyKi1OrtaBT2p311075I7M/V4JQZABJBBIDUCUEcavUGNBj70766aV8k9qdubf8zRACYL53wChEA5kXbBdH247ZP2P7AdsUrSrbftn3M9mu2R+ZzG+tRx/58zvaY7bdsb5vPbayH7SW2D9g+mbfXVxjXtsen2vfaM/46H3/D9l2t2M5a1bA/D9h+N4/Fa7b/uBXbWQvbf2/7jO3jFR5v7rGJiLb6kvQpSQOSDksqzTHubUlLW729jdgfST2S/k3SbZI+Iul1Sbe3etsrbOufS9qWy9sk/VknHZ9avteSBiV9W5Il3SvplVZvd8H9eUDSt1q9rTXuz2ck3SXpeIXHm3ps2u4VYkSMRkTX/Ib5GvfnbklvRcSPI+IXkl6QtL75W3dV1kvamcs7JW1o4bZcjVq+1+sl/WPM+IGkxbaXz/eG1qiT/tupKiK+J+m/5hjS1GPTdkGsQ0j6ju0jtje3emMK6pf0zmX3T+W6dnRjRExKUt4uqzCuXY9PLd/rTjoetW7rfbZft/1t2782P5vWFE09Ni35rXu2D0q6qcxDz0TE3hqf5v6ImLC9TNIB2z/K/7vMuwbsj8usa9nl/7n2p46naZvjM0st3+u2Oh5V1LKtr2rmrWr/bXtQ0h5Jq5q+Zc3R1GPTkiBGxJoGPMdE3p6x/ZJmTh1a8heuAftzStLNl91fIWmi4HNetbn2x/Zp28sjYjJPVc5UeI62OT6z1PK9bqvjUUXVbY2I9y5b3m/7b2wvjYhOfJ9zU49NR54y215k+xOXliX9lqSyV6U6xA8lrbJ9q+2PSNooaV+Lt6mSfZI25fImSf/vFXCbH59avtf7JP12XtG8V9K7l35M0Iaq7o/tm2w7l+/WzN/7s/O+pY3R3GPT6qtKZa4iPaqZ/wv8j6TTkoZzfZ+k/bl8m2aupr0u6YRmTk1bvu1Xuz/xf1fP/lUzVwzbeX9+WdIhSSfzdkmnHZ9y32tJX5T0xVy2pK/l48c0x792aIevGvbnS3kcXpf0A0mfbvU2z7EvuyRNSjqff29+Zz6PDe9UAYDUkafMANAMBBEAEkEEgEQQASARRABIBBEAEkEEgEQQASD9LwKN6Cspee0TAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "def get_name(d):\n", - " if d ==0:\n", - " return \"\"\n", - " else:\n", - " return \"M\"\n", - "fig, ax = plt.subplots(figsize=(5,5))\n", - "inst = np.random.choice(n_train)\n", - "ax.scatter(X_train[inst][:, 0], X_train[inst][:, 1])\n", - "for i in range(n_objects):\n", - " ax.text(X_train[inst, i, 0]+0.01,\n", - " X_train[inst, i, 1]+0.01,\n", - " s=get_name(int(Y_train[inst, i])))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The FATE network\n", - "The first-aggregate-then-evaluate approach learns an embedding of each object and then aggregates that into a _context_:\n", - "\\begin{equation}\n", - "\t\\mu_{C(\\vec{x})} = \\frac{1}{|C(\\vec{x})|} \\sum_{\\vec{y} \\in C(\\vec{x})} \\phi(\\vec{y})\n", - "\\end{equation}\n", - "and then scores each object $\\vec{x}$ using a generalized utility function $U (\\vec{x}, \\mu_{C(\\vec{x})})$." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from keras.optimizers import SGD\n", - "from csrank import FATEObjectRanker\n", - "from csrank.losses import smooth_rank_loss\n", - "fate = FATEObjectRanker(\n", - " loss_function=smooth_rank_loss,\n", - " optimizer=SGD,\n", - " optimizer__lr=1e-4,\n", - " optimizer__nesterov=True,\n", - " optimizer__momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will run the training for only 10 epochs to get an idea of the convergence:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train on 9000 samples, validate on 1000 samples\n", - "Epoch 1/10\n", - "9000/9000 [==============================] - 5s 597us/step - loss: 11.3359 - zero_one_rank_loss_for_scores_ties: 2.2760 - val_loss: 11.1825 - val_zero_one_rank_loss_for_scores_ties: 1.7870\n", - "Epoch 2/10\n", - "9000/9000 [==============================] - 0s 55us/step - loss: 11.1318 - zero_one_rank_loss_for_scores_ties: 1.6821 - val_loss: 11.0589 - val_zero_one_rank_loss_for_scores_ties: 1.5470\n", - "Epoch 3/10\n", - "9000/9000 [==============================] - 1s 56us/step - loss: 11.0235 - zero_one_rank_loss_for_scores_ties: 1.5189 - val_loss: 10.9597 - val_zero_one_rank_loss_for_scores_ties: 1.4140\n", - "Epoch 4/10\n", - "9000/9000 [==============================] - 0s 53us/step - loss: 10.9261 - zero_one_rank_loss_for_scores_ties: 1.3969 - val_loss: 10.8706 - val_zero_one_rank_loss_for_scores_ties: 1.3390\n", - "Epoch 5/10\n", - "9000/9000 [==============================] - 1s 56us/step - loss: 10.8475 - zero_one_rank_loss_for_scores_ties: 1.3219 - val_loss: 10.8015 - val_zero_one_rank_loss_for_scores_ties: 1.2610\n", - "Epoch 6/10\n", - "9000/9000 [==============================] - 0s 54us/step - loss: 10.7881 - zero_one_rank_loss_for_scores_ties: 1.2678 - val_loss: 10.7448 - val_zero_one_rank_loss_for_scores_ties: 1.1940\n", - "Epoch 7/10\n", - "9000/9000 [==============================] - 1s 56us/step - loss: 10.7408 - zero_one_rank_loss_for_scores_ties: 1.2316 - val_loss: 10.6945 - val_zero_one_rank_loss_for_scores_ties: 1.1680\n", - "Epoch 8/10\n", - "9000/9000 [==============================] - 0s 55us/step - loss: 10.6958 - zero_one_rank_loss_for_scores_ties: 1.1852 - val_loss: 10.6414 - val_zero_one_rank_loss_for_scores_ties: 1.1430\n", - "Epoch 9/10\n", - "9000/9000 [==============================] - 1s 59us/step - loss: 10.6492 - zero_one_rank_loss_for_scores_ties: 1.1494 - val_loss: 10.5997 - val_zero_one_rank_loss_for_scores_ties: 1.0930\n", - "Epoch 10/10\n", - "9000/9000 [==============================] - 1s 56us/step - loss: 10.6093 - zero_one_rank_loss_for_scores_ties: 1.1200 - val_loss: 10.5566 - val_zero_one_rank_loss_for_scores_ties: 1.0710\n" - ] - } - ], - "source": [ - "fate.fit(X_train, Y_train, verbose=True, epochs=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "scores = fate.predict_scores(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.0035" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from csrank.metrics_np import categorical_accuracy_np\n", - "from keras import backend as K\n", - "categorical_accuracy_np(Y_test, scores)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Not converged yet, but let us visualize the scores it assigns to test instances:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUgAAAEvCAYAAAAn5VjRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAVsElEQVR4nO3df4yW5Z3v8fdXYOKouKMiyMwDq2RkokgrdmRRd93untYfnM1C0ZIhTWhPaygWm91uYlZDcnLsJhWP54/9UdsesnZbmwV60lWGrPxQm7oek1WEIlWwiEdpmWdcLbZYm04rTq/zx/PADsNczMAzz48Z3q/kydz3dV/3fX29hQ/3r+eeSCkhSTrRWfUuQJIalQEpSRkGpCRlGJCSlGFASlKGASlJGRPrXcDJTJkyJV166aX1LkPSOLNz585DKaWLh+vX0AF56aWXsmPHjnqXIWmciYifjKSfp9iSlGFASlKGASlJGQZkA9m6dSsdHR20t7ezZs2aepcjnfEMyAbR39/PqlWr2LJlC3v37mX9+vXs3bu33mVJZzQDskFs376d9vZ2Zs2aRVNTE11dXXR3d9e7LOmMZkA2iGKxyIwZM47NFwoFisViHSuSZEA2iKHeyxkRdahE0lEGZIMoFAocPHjw2HxPTw+tra11rEhSQ3+TZrzbuKvIg9v20Xu4j+mTm9j17OeYMqWfadOCiRM3sG7duiHXe+EFWLAAvvtduP32GhctnUE8gqyTjbuK3PvoSxQP95GA3vfeZ/KCXxOTFrN//2ssXbqUOXPmnLBefz/89V/DzTfXvmbpTGNA1smD2/bRd6T/uLZzrr2YmcvvZfbs2axevXrI9f7hH+C222Dq1FpUKZ3ZDMg66T3cN2T7W7/8TXadYhEeewxWrqxWVZIGMiDrpLWlecj2aeefnV3nL/8SHngAJkyoVlWSBjIg6+TumztonnR80jVPmsDnb5yVXWfHDujqgksvhe99D77wBdi4scqFSmcwA7JOFs9r4/4lc2lraSaAtpZm7l8yl5vmXJJd54034MCB0uf22+FrX4PFi2tVsXRyn/3sZ5k6dSpXXXVVvUsZNaPymE9E3AL8HTAB+MeU0ppByz8KdANvlJseTSl9eTTGHssWz2tj8by2Y/PLlsGdT8OhQ1AowH33wZEjpWVed1Sj+8xnPsNdd93F8uXL613KqKk4ICNiAvAQ8HGgB3ghIjallAa/aeH/ppT+rNLxxrP160fe91vfqloZ0mm58cYbOXDgQL3LGFWjcYo9H3gtpfR6Sul9YAOwaBS2K0l1NRoB2QYcHDDfU24b7LqI2B0RWyLixCegJanBjMY1yKHeqDD4zQs/BH4/pfSriFgIbAQuH3JjESuAFQAzZ84chfIk6fSMRkD2ADMGzBeA3oEdUkq/HDC9OSK+FhFTUkqHBm8spbQWWAvQ2dl54ituJDWEge8SaG1p5u6bO7j6gnpXNbpG4xT7BeDyiLgsIpqALmDTwA4RcUmU390VEfPL474zCmNLqoPB7xIoHu7jk3+6m1mzzmPPng0UCgUefvjhE9Z7+mm4+mqYMwf++I9rXvYpq/gIMqX0QUTcBWyj9JjPN1NKeyJiZXn5N4DbgTsj4gOgD+hKQ70AUdKYMNS7BC5aeAHTLnyd/h/M5+WXe05Y5/Dh0pcbtm6FmTPh7bdrVe3pG5XnIFNKm4HNg9q+MWD6q8BXR2MsSfU31LsEzp7xc955t5mWzDrr1sGSJaVwhLHxwhW/SSPplJ3OuwRefRV+8Qv46EfhIx+BRx6pUnGjyICUdMpO510CH3wAO3fC44/Dtm3wN39TCs1G5hvFJZ2yo1+RPfEu9iX8fWadQgGmTIFzzy19brwRdu+G2bNrV/epMiAlnZbB7xKA0otUchYtgrvuKh1Jvv8+PP88fOlL1a2xUgakpFGxbFnpMZ7cy1auuAJuuQU+9CE46yy44w5o9Bf/RCM/bdPZ2Zl27NhR7zIkjTMRsTOl1DlcP2/SSFKGASlJGQakJGUYkJKUYUBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKUYUBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJLGvK1bt9LR0UF7eztr1qwZte0akJLGtP7+flatWsWWLVvYu3cv69evZ+/evaOybQNS0pi2fft22tvbmTVrFk1NTXR1ddHd3T0q2zYgJY1pxWKRGTNmHJsvFAoUi8VR2bYBKWlMSymd0BYRo7JtA1LSmFYoFDh48OCx+Z6eHlpbW0dl2xNHZSuSVEMbdxV5cNs+eg/3MX1yE7ue/RxTpvQzbVowceIG1q1bd1z/p5+GRYvgssuOtjw4fSTjeAQpaUzZuKvIvY++RPFwHwnofe99Ji/4NTFpMfv3v8bSpUuZM2fOCev90R/Biy+WPnD3myMZyyNISWPKg9v20Xek/7i2c669mIs67+W3j89m9erVozaWASlpTOk93Ddk+1u//A0tJ1nv3/8dPvxhKF2e/MjZIxnLU2xJY0prS/OQ7dPOz2feNdfAT34Cu3fDF78I8C/tIxnLgJQ0ptx9cwfNkyYc19Y8aQKfv3FWdp3zz4fzzitNL1wIMDEimDLcWAakpDFl8bw27l8yl7aWZgJoa2nm/iVzuWnOJdl1/uM/4Ojjktu3Qzn63hluLK9BShpzFs9rY/G8tmPzy5bBnU/DoUNQKMB998GRI6VlK1fC974HX/86TJwIzc0Ay19P6ckTnzAfJIZ6Cv1URcQtwN8BE4B/TCmtGbQ8yssXAr8GPpNS+uFw2+3s7Ew7duyouD5JGigidqaUOofrV/EpdkRMAB4CbgWuBJZFxJWDut0KXF7+rAC+Xum4klRto3ENcj7wWkrp9ZTS+8AGYNGgPouAR1LJc0BLRIzoSXZJqpfRCMg24OCA+Z5y26n2kaSGMhoBOdRrMwZf2BxJn1LHiBURsSMidvzsZz+ruDhJOl2jEZA9wIwB8wWg9zT6AJBSWptS6kwpdV588cWjUJ4knZ7RCMgXgMsj4rKIaAK6gE2D+mwClkfJAuDdlNKIviwuSfVS8XOQKaUPIuIuYBulx3y+mVLaExEry8u/AWym9IjPa5Qe8/lvlY4rSdU2Kg+Kp5Q2UwrBgW3fGDCdgFWjMZYk1YpfNZSkDANSkjIMSEnKMCAlKcOAlKQMA1KSMgxIScowICUpw4CUpAwDUpIyDEhJyjAgJSnDgJSkDANSkjIMSEnKMCAlKcOAlKQMA1KSMgxIScowICUpw4CUpAwDUpIyDEhJyjAgJSnDgJSkDANSkjIMSEnKMCAlKcOAlKQMA1KSMgxIScowICUpw4CUpAwDUpIyDEhJyjAgJSnDgJSkDANSkjIMSEnKMCAlKcOAlKQMA1KSMgxIScowICUpw4CUpAwDUpIyDEhJyphYycoRcSHwXeBS4ACwNKX0iyH6HQDeA/qBD1JKnZWMK0m1UOkR5D3A91NKlwPfL8/n/ElK6WrDUdJYUWlALgK+XZ7+NrC4wu1JUsOoNCCnpZTeBCj/nJrpl4AnImJnRKyocExJqolhr0FGxFPAJUMsWn0K49yQUuqNiKnAkxHx45TSM5nxVgArAGbOnHkKQ0jS6Bo2IFNKH8sti4i3ImJ6SunNiJgOvJ3ZRm/559sR8RgwHxgyIFNKa4G1AJ2dnWn4/wRJqo5KT7E3AZ8uT38a6B7cISLOjYjJR6eBm4CXKxxXkqqu0oBcA3w8IvYDHy/PExGtEbG53Gca8GxE7Aa2A4+nlLZWOK4kVV1Fz0GmlN4B/ssQ7b3AwvL068CHKxlHkurBb9JIUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKUYUBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKUYUBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKUYUBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGVUFJAR8cmI2BMRv4uIzpP0uyUi9kXEaxFxTyVjSlKtVHoE+TKwBHgm1yEiJgAPAbcCVwLLIuLKCseVpKqbWMnKKaVXACLiZN3mA6+llF4v990ALAL2VjK2JFVbLa5BtgEHB8z3lNskqaENewQZEU8BlwyxaHVKqXsEYwx1eJlOMt4KYAXAzJkzR7B5SaqOYQMypfSxCsfoAWYMmC8AvScZby2wFqCzszMbpJJUbbU4xX4BuDwiLouIJqAL2FSDcSWpIpU+5vOJiOgBrgMej4ht5fbWiNgMkFL6ALgL2Aa8AvyflNKeysqWpOqr9C72Y8BjQ7T3AgsHzG8GNlcyliTVmt+kkaQMA1KSMgxIScowICUpw4CUpAwDUpIyDEhJyjAgJSnDgJSkDANSkjIMSEnKMCAlKcOAlKQMA1KSMgxIScowICUpw4CUpAwDUpIyxnVAbt26lY6ODtrb21mzZk29y5E0xozbgOzv72fVqlVs2bKFvXv3sn79evbu3VvvsiSNIeM2ILdv3057ezuzZs2iqamJrq4uuru7612WpDFk3AZksVhkxowZx+YLhQLFYrGOFUkaa8ZtQKaUTmiLiDpUImmsGrcBWSgUOHjw4LH5np4eWltb61iRpLFmYr0LGC0bdxV5cNs+eg/30drSzPKrZvPMM/+D9vb3aWqayLvvnsUTT/zZceukBH/xF7B5M5xzDnzrW3DNNfWpX1LjGRdHkBt3Fbn30ZcoHu4jAcXDffyvp/bzqS++w1lnXUVf39X89rd3EDHnuPW2bIH9+0uftWvhzjvrU7+kxjQuAvLBbfvoO9J/XNuRs3/NKxeew6uvvsobb/yIG264iMH3aLq7YflyiIAFC+DwYXjzzRoWLqmhjYuA7D3cd9L2Awdg1y74gz84fnmxCANudFMocEKISjpzjYuAbG1pzrb/6ldw223wt38L559//PIhbnTjjW5JR42LgLz75g6aJ004rq150gS+9Kcd3HYbfOpTsGTJiesVCjDgRjc9PeCNbklHjYuAXDyvjfuXzKWtpZkA2lqa+con5rLpq21ccQX81V8Nvd6f/zk88kjpSPK55+D3fg+mT69p6ZIa2Lh5zGfxvDYWz2s7Nv/ss/Cd78DcuXD11aW2r3wFfvrT0vTKlbBwYekRn/b20mM+//RPdShcUsMaNwE52B/+4dDXGAeKgIceqk09ksaecXGKLUnVYEBKUoYBKUkZBqQkZRiQkpRhQEpShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKUYUBKUkZFARkRn4yIPRHxu4joPEm/AxHxUkS8GBE7KhlTkmql0vdBvgwsAf73CPr+SUrpUIXjSVLNVBSQKaVXAMLfdCVpHKrVNcgEPBEROyNiRY3GlKSKDHsEGRFPAZcMsWh1Sql7hOPckFLqjYipwJMR8eOU0jOZ8VYAKwBmzpw5ws1L0ugbNiBTSh+rdJCUUm/559sR8RgwHxgyIFNKa4G1AJ2dncP8VhlJqp6qn2JHxLkRMfnoNHATpZs7ktTQKn3M5xMR0QNcBzweEdvK7a0RsbncbRrwbETsBrYDj6eUtlYyrsaurVu30tHRQXt7O2vWrKl3OdJJRRrud6PWUWdnZ9qxw8cmx4v+/n5mz57Nk08+SaFQ4Nprr2X9+vVceeWV9S5NZ5iI2JlSyj67fZTfpFHNbN++nfb2dmbNmkVTUxNdXV10d4/0Pp9UewakaqZYLDJjxoxj84VCgWKxWMeKpJMzIFUzQ13O8UsGamQGpGqmUChw8ODBY/M9PT20trbWsSLp5Cr9LrZ0Uht3FXlw2z56D/cxfXITr+/5MW+88QZtbW1s2LCBdevW1btEKcsjSFXNxl1F7n30JYqH+0hA73vv817TRtrbJ3PeeQdYunQpc+bMOW6df/5n+NCHSp/rr4fdu+tTuwQeQaqKHty2j74j/ce1Tb7uN7T/19fp/8F8Vq9efcI6l10G//ZvcMEFsGULrFgBzz9fq4ql4xmQqprew30ntJ094+e8824zLZl1rr/+P6cXLICenurUJo2Ep9iqmtaW5iHbp51/9ojWf/hhuPXW0axIOjUGpKrm7ps7aJ404bi25kkT+PyNs4Zd9wc/KAXkAw9UqzppeAakqmbxvDbuXzKXtpZmAmhraeb+JXO5ac5Qb8/7Tz/6EdxxB3R3w0UX1aZWaSheg1RVLZ7XxuJ5bce1HTiQ7//Tn8KSJfCd78Ds2dWtTRqOAamaWrYMnn4aDh2CQgHuuw+OHCktW7kSvvxleOcd+MIXSm0TJ4LvK1G9+DYfSWcc3+YjSRUyICUpw4CUpAwDUpIyDEhJyjAgJSnDgJSkjIZ+DjIifgb8pM5lTAEO1bmG02HdtWXdtVVp3b+fUrp4uE4NHZCNICJ2jOSB0kZj3bVl3bVVq7o9xZakDANSkjIMyOGtrXcBp8m6a8u6a6smdXsNUpIyPIKUpAwDcpCI+GRE7ImI30VE9i5ZRNwSEfsi4rWIuKeWNWbquTAinoyI/eWfF2T6HYiIlyLixYio27vkhtt/UfL35eU/iohr6lHnYCOo+6MR8W55/74YEf+9HnUOqumbEfF2RLycWd6o+3q4uqu/r1NKfgZ8gCuADuBpoDPTZwLw/4BZQBOwG7iyznX/T+Ce8vQ9wAOZfgeAKXWuddj9BywEtgABLACeb4A/GyOp+6PAv9a71kE13QhcA7ycWd5w+3qEdVd9X3sEOUhK6ZWU0r5hus0HXkspvZ5Seh/YACyqfnUntQj4dnn628DiOtYynJHsv0XAI6nkOaAlIqbXutBBGvH/+7BSSs8APz9Jl0bc1yOpu+oMyNPTBhwcMN9TbqunaSmlNwHKP6dm+iXgiYjYGREralbd8Uay/xpxH4+0pusiYndEbImIObUprSKNuK9Hqqr7+oz8nTQR8RQw1K/WW51S6h7JJoZoq/rjACer+xQ2c0NKqTcipgJPRsSPy/9S19JI9l9d9vEwRlLTDyl9je1XEbEQ2AhcXvXKKtOI+3okqr6vz8iATCl9rMJN9AAzBswXgN4Ktzmsk9UdEW9FxPSU0pvl06O3M9voLf98OyIeo3TaWOuAHMn+q8s+HsawNaWUfjlgenNEfC0ipqSUGvn7zo24r4dVi33tKfbpeQG4PCIui4gmoAvYVOeaNgGfLk9/GjjhSDgizo2IyUengZuAIe8QVtlI9t8mYHn5DusC4N2jlxDqaNi6I+KSiIjy9HxKf8feqXmlp6YR9/WwarKv632nqtE+wCco/Yv6W+AtYFu5vRXYPKDfQuBVSnc1VzdA3RcB3wf2l39eOLhuSndfd5c/e+pZ91D7D1gJrCxPB/BQeflLZJ4oaMC67yrv293Ac8D1DVDzeuBN4Ej5z/bnxsi+Hq7uqu9rv0kjSRmeYktShgEpSRkGpCRlGJCSlGFASlKGASlJGQakJGUYkJKU8f8B+wYqz1qgV/8AAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots(figsize=(5,5))\n", - "inst = np.random.choice(n_test)\n", - "ax.scatter(X_test[inst][:, 0], X_test[inst][:, 1])\n", - "for i in range(n_objects):\n", - " ax.text(X_test[inst, i, 0]+0.01,\n", - " X_test[inst, i, 1]+0.01,\n", - " s=int(Y_test[inst, i]))\n", - " ax.text(X_test[inst, i, 0]+0.01,\n", - " X_test[inst, i, 1]-0.05,\n", - " s='{:.1f}'.format(scores[inst][i]),\n", - " color='b')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/notebooks/FATE-Net-Ranking.ipynb b/docs/notebooks/FATE-Net-Ranking.ipynb deleted file mode 100644 index f2674972..00000000 --- a/docs/notebooks/FATE-Net-Ranking.ipynb +++ /dev/null @@ -1,277 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Object Ranking" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from csrank import FATEObjectRanker" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Medoid Ranking Problem" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the medoid problem the goal of the learner is to sort a set of randomly generated points based on their distance to the _medoid_.\n", - "This problem is inspired by the setting of ranking similarity learning, where the goal is to learn a similarity function from triplets of objects.\n", - "\n", - "We will generate a random dataset where each instance contains 5 objects and 2 features for easy plotting." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from csrank import ObjectRankingDatasetGenerator\n", - "seed = 123\n", - "n_train = 10000\n", - "n_test = 10000\n", - "n_features = 2\n", - "n_objects = 5\n", - "gen = ObjectRankingDatasetGenerator(dataset_type='medoid', random_state=seed,\n", - " n_train_instances=n_train,\n", - " n_test_instances=n_test,\n", - " n_objects=n_objects,\n", - " n_features=n_features)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us plot a random instance. The points are ranked by distance to the medoid. The medoid itself gets always gets the index 0." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVQAAAEyCAYAAAClPyjyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG4JJREFUeJzt3X+QVeWd5/H3J40QdowDBEiggQAjyyDJptE7mB1TiRoJaKWEqGNw4gZHLWozcXcTN5RQpiquMylRkzXubmYmJJowJhEnxh89/mKA1kmVFaOXQARxEUR36W5GSQyTuHYItN/9456Oh/b2z/t039u3P6+qW/ec5zzPPd97bvvx/LjnoojAzMwq945qF2BmVi8cqGZmiThQzcwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRMZUu4DBmDx5csyePbvaZZhZndm+ffsvImLKYMePyECdPXs2xWKx2mWYWZ2R9H8qGe9DfjOzRByoZmaJOFDNzBJxoJpZzfntb3/L4sWL+eAHP8jChQv58pe/XO2S+mVEXpQys/o2btw4WlpaOPnkkzl27Bgf/vCHOf/88/nQhz5U7dJ65T1UM6s5kjj55JMBOHbsGMeOHUNSlavqmwPVzGpSZ2cnTU1NTJ06lSVLlnDmmWdWu6Q+OVDNrCY1NDSwc+dOWltbefrpp9m9e3e1S+pTkkCVtEzSXkn7Ja0ts/w2STuzxwuSjuSWdeaWNaeox8zqx4QJEzj77LN57LHHql1Knyq+KCWpAfgGsARoBZ6R1BwRe7r6RMQXcv3/E7Ao9xIdEdFUaR1mNnI9sKONWzfvpf1IB9MnjGf14sksP30WEyZMoKOjg61bt3LddddVu8w+pbjKvxjYHxEHACRtApYDe3rofxkwMr4DYWZD7oEdbay7bxcdxzoBaDvSwQ2bnuS//eWfc/LYd/Dmm29y6aWX8olPfKLKlfYtRaA2Agdz861A2bPHkt4HzAFacs3vlFQEjgPrI+KBBDWZ2Qhx6+a9vw/TLjFpFtOv+B88ufbcKlU1OCkCtdx3GaKHviuBeyMiv/VmRUS7pLlAi6RdEfHi21YirQZWA8yaNavSms2sRrQf6RhQey1LcVGqFZiZm58BtPfQdyVwd74hItqz5wPAE5x4fjXfb0NEFCKiMGXKoH9dy8xqzPQJ4wfUXstSBOozwDxJcySNpRSab7taL2k+MBH4Sa5toqRx2fRk4Cx6PvdqZnVozdL5jD+p4YS28Sc1sGbp/CpVNHgVH/JHxHFJ1wCbgQbgzoh4TtKNQDEiusL1MmBTRORPBywAvinpTUrhvj7/7QAzq38rFjUCnHCVf83S+b9vH0l0Yr6NDIVCIfwD02aWmqTtEVEY7HjfKWVmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRByoZmaJOFDNzBJxoJqZJeJANTNLxIFqZpaIA9XMLBEHqplZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0skSaBKWiZpr6T9ktaWWX6FpMOSdmaPq3PLVknalz1WpajHzKwaxlT6ApIagG8AS4BW4BlJzRGxp1vXeyLimm5jJwFfBgpAANuzsb+qtC4zs+GWYg91MbA/Ig5ExO+ATcDyfo5dCmyJiNeyEN0CLEtQk5nZsEsRqI3Awdx8a9bW3cWSnpV0r6SZAxyLpNWSipKKhw8fTlC2mVlaKQJVZdqi2/w/ArMj4t8BW4GNAxhbaozYEBGFiChMmTJl0MWamQ2VFIHaCszMzc8A2vMdIuKXEXE0m/0WcEZ/x5qZjRQpAvUZYJ6kOZLGAiuB5nwHSdNysxcCz2fTm4GPS5ooaSLw8azNzGzEqfgqf0Qcl3QNpSBsAO6MiOck3QgUI6IZ+M+SLgSOA68BV2RjX5P0V5RCGeDGiHit0prMzKpBEWVPWda0QqEQxWKx2mWYWZ2RtD0iCoMd7zulzMwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRByoZmaJOFDNzBJxoJqZJeJANTNLxIFqZpaIA9XMLBEHqplZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0SSBKqkZZL2StovaW2Z5ddK2iPpWUnbJL0vt6xT0s7s0ZyiHjOzahhT6QtIagC+ASwBWoFnJDVHxJ5ctx1AISLekPRZ4BbgU9myjohoqrQOM7NqS7GHuhjYHxEHIuJ3wCZgeb5DRDweEW9ks08BMxKs18yspqQI1EbgYG6+NWvryVXAo7n5d0oqSnpK0oqeBklanfUrHj58uLKKzcyGQMWH/IDKtEXZjtLlQAH4aK55VkS0S5oLtEjaFREvvu0FIzYAGwAKhULZ1zczq6YUe6itwMzc/AygvXsnSecB1wMXRsTRrvaIaM+eDwBPAIsS1GRmNuxSBOozwDxJcySNBVYCJ1ytl7QI+CalMH011z5R0rhsejJwFpC/mGVmNmJUfMgfEcclXQNsBhqAOyPiOUk3AsWIaAZuBU4GfigJ4P9GxIXAAuCbkt6kFO7ru307wMxsxFDEyDsdWSgUolgsVrsMM6szkrZHRGGw432nlJlZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRByoZmaJOFDNzBJxoJqZJeJANTNLxIFqZpZIskCVtEzSXkn7Ja0ts3ycpHuy5T+VNDu3bF3WvlfS0lQ1mZkNpySBKqkB+AZwPnAacJmk07p1uwr4VUScCtwG3JyNPQ1YCSwElgF/k72emdmIkmoPdTGwPyIORMTvgE3A8m59lgMbs+l7gY9JUta+KSKORsRLwP7s9czMRpRUgdoIHMzNt2ZtZftExHHgX4F393OsmVnNSxWoKtMW/ezTn7FIWi2pKKl4+PDhQZRoZja0UgVqKzAzNz8DaO+pj6QxwB8Cr/VzLBGxISIKEVGYMmVKorLNzNJJFajPAPMkzZE0ltJFpuZufZqBVdn0JUBLRETWvjL7FsAcYB7wdKK6zMyGzZgULxIRxyVdA2wGGoA7I+I5STcCxYhoBu4A7pK0n9Ke6cps7HOS/gHYAxwHPhcRnSnqMjMbTirtJI4shUIhisVitcswszojaXtEFAY73ndKmZkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCa1aHHHnuM+fPnc+qpp7J+/fpqlzNqOFDN6kxnZyef+9znePTRR9mzZw933303e/bsqXZZo4ID1azOPP3005x66qnMnTuXsWPHsnLlSh588MFqlzUqOFDN6kxbWxszZ771i5gzZsygra2tihWNHg5UszpT7gePSv/akA01B6pZnZkxYwYHD771rwq1trYyffr0KlY0eiT5PVQzq54HdrRx6+a9tB/pYPqE8Vz7sT9i3759vPTSSzQ2NrJp0yZ+8IMfVLvMUcGBajaCPbCjjXX37aLjWOk32duOdPCl5uf59BduYOnSpXR2dnLllVeycOHCKlc6OjhQzUawWzfv/X2Yduk41knL/5vBCy+8UKWqRi+fQzUbwdqPdAyo3YaWA9VsBJs+YfyA2m1oOVDNRrA1S+cz/qSGE9rGn9TAmqXzq1TR6OZzqGYj2IpFjQAnXOVfs3T+79tteDlQzUa4FYsaHaA1wof8ZmaJOFDNzBKpKFAlTZK0RdK+7HlimT5Nkn4i6TlJz0r6VG7ZdyW9JGln9miqpB4zs2qqdA91LbAtIuYB27L57t4APhMRC4FlwNclTcgtXxMRTdljZ4X1mJlVTaWBuhzYmE1vBFZ07xARL0TEvmy6HXgVmFLhes3Mak6lgfqeiDgEkD1P7a2zpMXAWODFXPNXslMBt0ka18vY1ZKKkoqHDx+usGwzs/T6DFRJWyXtLvNYPpAVSZoG3AX8RUS8mTWvA/4Y+BNgEnBdT+MjYkNEFCKiMGWKd3DNrPb0+T3UiDivp2WSXpE0LSIOZYH5ag/9TgEeBr4UEU/lXvtQNnlU0neALw6oejOzGlLpIX8zsCqbXgW87R+ukTQWuB/4+4j4Ybdl07JnUTr/urvCeszMqqbSQF0PLJG0D1iSzSOpIOnbWZ9LgY8AV5T5etT3Je0CdgGTgb+usB4zs6pRuX9/ptYVCoUoFovVLsPM6oyk7RFRGOx43yllZpaIA9XMLBEHqplZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRByoZmaJOFDNzBJxoJqZJVJxoEqaJGmLpH3Z88Qe+nVK2pk9mnPtcyT9NBt/j6SxldZkZlYNKfZQ1wLbImIesC2bL6cjIpqyx4W59puB27LxvwKuSlCTmdmwSxGoy4GN2fRGYEV/B0oScC5w72DGm5nVkhSB+p6IOASQPU/tod87JRUlPSWpKzTfDRyJiOPZfCvQWG6wpNXZ+OLhw4cTlG1mltaY/nSStBV4b5lF1w9gXbMiol3SXKBF0i7g12X6RbnBEbEB2ABQKBTK9jEzq6Z+BWpEnNfTMkmvSJoWEYckTQNe7eE12rPnA5KeABYBPwImSBqT7aXOANoH+B7MzGpCikP+ZmBVNr0KeLB7B0kTJY3LpicDZwF7IiKAx4FLehtvZjYSpAjU9cASSfuAJdk8kgqSvp31WQAUJf2cUoCuj4g92bLrgGsl7ad0TvWOBDWZmQ07lXYSR5ZCoRDFYrHaZZhZnZG0PSIKgx3vO6XMzBJxoJqZJeJANTNLxIFqZpaIA9XMLBEHqplZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCamSXiQDUzS8SBamaWiAPVzCwRB6qZWSIOVDOzRCoKVEmTJG2RtC97nlimzzmSduYev5W0Ilv2XUkv5ZY1VVKPmVk1VbqHuhbYFhHzgG3Z/Aki4vGIaIqIJuBc4A3gn3Jd1nQtj4idFdZjZlY1lQbqcmBjNr0RWNFH/0uARyPijQrXa2ZWcyoN1PdExCGA7HlqH/1XAnd3a/uKpGcl3SZpXIX1mJlVzZi+OkjaCry3zKLrB7IiSdOADwCbc83rgH8BxgIbgOuAG3sYvxpYDTBr1qyBrNrMbFj0GagRcV5PyyS9ImlaRBzKAvPVXl7qUuD+iDiWe+1D2eRRSd8BvthLHRsohS6FQiH6qtvMbLhVesjfDKzKplcBD/bS9zK6He5nIYwkUTr/urvCeszMqqbSQF0PLJG0D1iSzSOpIOnbXZ0kzQZmAv/cbfz3Je0CdgGTgb+usB4zs6rp85C/NxHxS+BjZdqLwNW5+ZeBxjL9zq1k/WZmtcR3SpmZJeJANTNLxIFqZpaIA9XMLBEHqplZIg5UM7NEHKhmZok4UM3MEnGgmpkl4kA1M0vEgWpmlogD1cwsEQeqmVkiDlQzs0QcqGZmiThQzcwScaCamSXiQDUzS8SBamZ16+DBg5xzzjksWLCAhQsXcvvttw/p+ir6N6XMzGrZmDFj+NrXvsbpp5/Ob37zG8444wyWLFnCaaedNiTr8x6qmdWtadOmcfrppwPwrne9iwULFtDW1jZk63Ogmtmo8PLLL7Njxw7OPPPMIVuHA9XM6t7rr7/OxRdfzNe//nVOOeWUIVuPA9WsTl155ZVMnTqV97///dUupaqOHTvGxRdfzKc//WkuuuiiIV1XxYEq6c8kPSfpTUmFXvotk7RX0n5Ja3PtcyT9VNI+SfdIGltpTWYGV1xxBY899li1yxhWD+xo46z1LcxZ+zBnrW/h/p+1ctVVV7FgwQKuvfbaIV9/ij3U3cBFwI976iCpAfgGcD5wGnCZpK7LbDcDt0XEPOBXwFUJajIb9T7ykY8wadKkapcxbB7Y0ca6+3bRdqSDANqOdPD52zdx11130dLSQlNTE01NTTzyyCNDVkPFX5uKiOcBJPXWbTGwPyIOZH03AcslPQ+cC/x51m8jcAPwt5XWZWajy62b99JxrPOENk1bwJ/etI0n1547LDUM1znURuBgbr41a3s3cCQijndrfxtJqyUVJRUPHz48pMWa2cjTfqRjQO1DoV+BKmmrpN1lHsv7uZ5yu6/RS/vbGyM2REQhIgpTpkzp52rNbLSYPmH8gNqHQr8O+SPivArX0wrMzM3PANqBXwATJI3J9lK72s1sgB7Y0catm/fSfqSD6RPGs2bpfJomVruq4bNm6XzW3bfrhMP+8Sc1sGbp/GGrYbgO+Z8B5mVX9McCK4HmiAjgceCSrN8q4MFhqsmsbpS7IPOZyz/N6X9yJnv37mXGjBnccccd1S5zSK1Y1MhNF32AxgnjEdA4YTw3XfQBViwqexZxSKiUaRW8gPRJ4H8CU4AjwM6IWCppOvDtiLgg63cB8HWgAbgzIr6Stc8FNgGTgB3A5RFxtLd1FgqFKBaLFdVtVk/OWt9CW5lzhY0Txg/bBZl6IGl7RPT49c++pLjKfz9wf5n2duCC3PwjwNu+r5Bd+V9caR1mo1ktXJAx3yllVhdq4YKMOVDN6sKapfMZf1LDCW3DfUHG/HuoZnWh68JL96v8w3lBxkZ5oHZ2dlIoFGhsbOShhx6qdjlmFVmxqNEBWmWj+pD/9ttvZ8GCBdUuw8zqxKgN1NbWVh5++GGuvvrqapdiZnVi1Abq5z//eW655Rbe8Y5RuwnMLLFRmSYPPfQQU6dO5Ywzzqh2KWZWR0ZloD755JM0Nzcze/ZsVq5cSUtLC5dffnm1yzKzEW5UXOV/249GXHoNN910EwBPPPEEX/3qV/ne975X5SrNbKSr+0Dt+tGIrl+gaTvSwbr7dgH4KyZmllTdH/KX+xXvjmOd3Lp5LwBnn322v4NqZknUfaD6RyPMbLjUfaD6RyPMbLjUfaD6RyPMbLjU/UUp/2iEmQ2Xug9U8I9GmNnwqPtDfjOz4eJANTNLxIFqZpaIA9XMLBEHqplZIg5UM7NEKgpUSX8m6TlJb0oq9NBnpqTHJT2f9f0vuWU3SGqTtDN7XFBJPWZm1VTp91B3AxcB3+ylz3Hgv0bEzyS9C9guaUtE7MmW3xYRX62wDjOzqqsoUCPieQBJvfU5BBzKpn8j6XmgEdjT4yAzsxFoWO+UkjQbWAT8NNd8jaTPAEVKe7K/6mHsamB1Nvu6pL19rG4y8IuKCk6jFuqohRqgNupwDW+phTpqoQZ4q473VfIiiojeO0hbgfeWWXR9RDyY9XkC+GJEFHt5nZOBfwa+EhH3ZW3vofQmAvgrYFpEXDmI91FufcWIKHtedzjVQh21UEOt1OEaaquOWqghZR197qFGxHmVrkTSScCPgO93hWn22q/k+nwL8C89m9mINeRfm1LpBOsdwPMR8d+7LZuWm/0kpYtcZmYjUqVfm/qkpFbg3wMPS9qctU+X9EjW7SzgPwDnlvl61C2Sdkl6FjgH+EIl9XSzIeFrVaIW6qiFGqA26nANb6mFOmqhBkhUR5/nUM3MrH98p5SZWSIOVDOzREZ0oPbn1tes38vZudqdkoq59kmStkjalz1PHIoahuP22wFsi2WS9kraL2ltrn2OpJ9m2+IeSWMHUUOf21PSObn3ulPSbyWtyJZ9V9JLuWVNA62hv3Vk/Tpz62rOtQ/XtmiS9JPsc3tW0qdyyyraFj19zrnl47L3tj97r7Nzy9Zl7XslLR3YOx9QDddK2pO9922S3pdbVvazGYIarpB0OLeuq3PLVmWf3z5Jq/q1wogYsQ9gATAfeAIo9NLvZWBymfZbgLXZ9Frg5qGoAZgGnJ5Nvwt4ATgtm7+B0nd4h3xbAA3Ai8BcYCzw81wd/wCszKb/DvjsIGoY0PYEJgGvAf8mm/8ucEmCbdGvOoDXe2gflm0B/FtgXjY9ndIdhRMq3Ra9fc65Pn8J/F02vRK4J5s+Les/DpiTvU7DENVwTu6z/2xXDb19NkNQwxXA/+rhb/NA9jwxm57Y1zpH9B5qRDwfEX3dMdWb5cDGbHojsGIoaoiIQxHxs2z6N0DX7bfJ9HNbLAb2R8SBiPgdsAlYLknAucC9Wb9BbQsGvj0vAR6NiDcGsa6UdfzecG6LiHghIvZl0+3Aq8CUQayru7Kfcy/13Qt8LHvvy4FNEXE0Il4C9mevl7yGiHg899k/BcwYxHoqqqEXS4EtEfFalO7e3AIs62vQiA7UAQjgnyRtV+kW1i7vidJvDZA9Tx3qQtTz7bfPSrpzMKcdBqAROJibb83a3g0ciYjj3doHaqDbcyVwd7e2r2Tb4jZJ4wZRw0DqeKekoqSnuk47UKVtIWkxpb2oF3PNg90WPX3OZftk7/VfKb33/oxNVUPeVcCjuflyn81Q1XBxtp3vlTRzgGNPUPP/6qn6cetrP5wVEe2SpgJbJP3viPjxMNfQdfvtj4DPR8Svs+a/pXTbbdftt18Dyt5+m6COcr9iE720D6iGfqw//zrTgA8Am3PN64B/oRQsG4DrgBuHsI5Z2d/FXKBF0i7g12X6Dce2uAtYFRFvZs393hblXrJMW/f3UPHfQoIaSh2ly4EC8NFc89s+m4h4sdz4Cmv4R+DuiDgq6T9S2ms/dyD159V8oEaCW1+zwyki4lVJ91M6FPgx8IqkaRFxKPujfnWoalCC228T1NEKzMzNzwDaKf2ewgRJY7K9la72AdUgqV/bM3MpcH9EHMu99qFs8qik7wBf7GlwijpyfxcHVPo9ikWUPqNh2xaSTgEeBr4UEU/lXrvf26KMnj7ncn1aJY0B/pDS+ez+jE1VA5LOo/Q/oI9GxNGu9h4+m4EGap81RMQvc7PfAm7OjT2729gn+lph3R/yS/oDlX6HFUl/AHyct25xbQa6rt6tAvq9tznAGmrl9ttngHkqXcUeS+mQuzlKZ+Efp3ROEwa/LQayPS+j2+F+17bIttcKBr8t+qxD0sSuw2hJkynd0bdnOLdF9hncD/x9RPyw27JKtkXZz7mX+i4BWrL33gyszL4FMAeYBzw9gHX3uwZJiyj9lvKFEfFqrr3sZzNENeT/+7uQ0vUNKB05fTyrZSKl3MgfTZVX6ZW0aj4oBVArcBR4Bdgcb10xfSSbnkvp6t7PgecoHR53jX83sA3Ylz1PGqIaPkzpcOFZYGf2uCBbdhewK1vWTOkXt4ZkW2TzF1D6lsGL3bbFXEr/4ewHfgiMG0QNZbcnpcO5b+f6zQbagHd0G9+SbYvdwPeAkwe5LfqsA/jTbF0/z56vGu5tAVwOHMv9TewEmlJsi3KfM6VTBhdm0+/M3tv+7L3OzY29Phu3Fzi/gv8++6pha/a32vXem/v6bIaghpso5cLPKf2P9I9zY6/Mts9+4C/6sz7fempmlkjdH/KbmQ0XB6qZWSIOVDOzRByoZmaJOFDNzBJxoJqZJeJANTNL5P8D8IYTcSwmndMAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots(figsize=(5,5))\n", - "inst = np.random.choice(n_train)\n", - "ax.scatter(X_train[inst][:, 0], X_train[inst][:, 1])\n", - "for i in range(n_objects):\n", - " ax.text(X_train[inst, i, 0]+0.01,\n", - " X_train[inst, i, 1]+0.01,\n", - " s=int(Y_train[inst, i]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The FATE network\n", - "The first-aggregate-then-evaluate approach learns an embedding of each object and then aggregates that into a _context_:\n", - "\\begin{equation}\n", - "\t\\mu_{C(\\vec{x})} = \\frac{1}{|C(\\vec{x})|} \\sum_{\\vec{y} \\in C(\\vec{x})} \\phi(\\vec{y})\n", - "\\end{equation}\n", - "and then scores each object $\\vec{x}$ using a generalized utility function $U (\\vec{x}, \\mu_{C(\\vec{x})})$" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from keras.optimizers import SGD\n", - "from csrank import FATEObjectRanker\n", - "from csrank.losses import smooth_rank_loss\n", - "fate = FATEObjectRanker(\n", - " loss_function=smooth_rank_loss,\n", - " optimizer=SGD,\n", - " optimizer__lr=1e-4,\n", - " optimizer__nesterov=True,\n", - " optimizer__momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will run the training for only 10 epochs to get an idea of the convergence:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train on 9000 samples, validate on 1000 samples\n", - "Epoch 1/10\n", - "9000/9000 [==============================] - 8s 848us/step - loss: 11.3143 - zero_one_rank_loss_for_scores_ties: 0.4318 - val_loss: 11.2180 - val_zero_one_rank_loss_for_scores_ties: 0.3547\n", - "Epoch 2/10\n", - "9000/9000 [==============================] - 1s 99us/step - loss: 11.2005 - zero_one_rank_loss_for_scores_ties: 0.3448 - val_loss: 11.1587 - val_zero_one_rank_loss_for_scores_ties: 0.3164\n", - "Epoch 3/10\n", - "9000/9000 [==============================] - 1s 112us/step - loss: 11.1526 - zero_one_rank_loss_for_scores_ties: 0.3150 - val_loss: 11.1150 - val_zero_one_rank_loss_for_scores_ties: 0.2932\n", - "Epoch 4/10\n", - "9000/9000 [==============================] - 1s 88us/step - loss: 11.1175 - zero_one_rank_loss_for_scores_ties: 0.2964 - val_loss: 11.0819 - val_zero_one_rank_loss_for_scores_ties: 0.2779\n", - "Epoch 5/10\n", - "9000/9000 [==============================] - 1s 160us/step - loss: 11.0899 - zero_one_rank_loss_for_scores_ties: 0.2850 - val_loss: 11.0541 - val_zero_one_rank_loss_for_scores_ties: 0.2657\n", - "Epoch 6/10\n", - "9000/9000 [==============================] - 1s 128us/step - loss: 11.0661 - zero_one_rank_loss_for_scores_ties: 0.2773 - val_loss: 11.0332 - val_zero_one_rank_loss_for_scores_ties: 0.2572\n", - "Epoch 7/10\n", - "9000/9000 [==============================] - 1s 101us/step - loss: 11.0433 - zero_one_rank_loss_for_scores_ties: 0.2704 - val_loss: 11.0121 - val_zero_one_rank_loss_for_scores_ties: 0.2524\n", - "Epoch 8/10\n", - "9000/9000 [==============================] - 1s 117us/step - loss: 11.0227 - zero_one_rank_loss_for_scores_ties: 0.2654 - val_loss: 10.9913 - val_zero_one_rank_loss_for_scores_ties: 0.2477\n", - "Epoch 9/10\n", - "9000/9000 [==============================] - 1s 117us/step - loss: 11.0028 - zero_one_rank_loss_for_scores_ties: 0.2619 - val_loss: 10.9738 - val_zero_one_rank_loss_for_scores_ties: 0.2453\n", - "Epoch 10/10\n", - "9000/9000 [==============================] - 1s 116us/step - loss: 10.9843 - zero_one_rank_loss_for_scores_ties: 0.2590 - val_loss: 10.9541 - val_zero_one_rank_loss_for_scores_ties: 0.2416\n" - ] - } - ], - "source": [ - "fate.fit(X_train, Y_train, verbose=True, epochs=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "scores = fate.predict_scores(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.26382047" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from csrank.metrics import zero_one_rank_loss_for_scores\n", - "from keras import backend as K\n", - "K.eval(zero_one_rank_loss_for_scores(Y_test, scores))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Not converged yet, but let us visualize the scores it assigns to test instances:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAEyCAYAAABzmvKXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF4RJREFUeJzt3X+Q1fV97/HnWxCllYoKKAsiWhABq0BW/BHHqEmKcg0S6w9sxupNLLVq79Rc6ZhhatTpTPQ6jTE2ub0kWjWJYOtEwIjQRIk2aY2uAQXxruJPdpcb8QeocdF1/dw/zhEXOLv7Wc7Zc3bh+Zg5c74/Pt/v571f1pffn/uNlBKSpK7tVesCJKk/MCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpShImEZEXdExOsRsbaT+adGxJaIWF38XFuJfiWpWgZWaD13Av8E3N1Fm/9IKZ1Vof4kqaoqEpYppcciYmwl1tXRsGHD0tixFV+tpD3cU0899UZKaXhPlqnUnmWOEyPiaaAFuDql9Gx3C4wdO5aGhober0zSHiUiXu3pMtUKy98Ch6WU3ouImcBiYHyphhExF5gLMGbMmCqVJ0ldq8rV8JTSOyml94rDy4C9I2JYJ20XpJTqU0r1w4f3aC9ZknpNVcIyIg6JiCgOTy/2+2Y1+pakSqjUrUMLgf8CJkREU0R8LSIui4jLik3OBdYWz1l+F5iT/Ntw0k42bNjAaaedxsSJE5k8eTK33nprrUtSUfTlzKqvr09e4NGeZOPGjWzcuJFp06bx7rvv8pnPfIbFixczadKkWpe2W4mIp1JK9T1Zxid4pD5k5MiRTJs2DYAhQ4YwceJEmpuba1yVwLCU+pzly2HCBBg7to1HHz2R448/frv5r70Gp50GU6fCMcfAsmU1KnQPY1hKfUh7O1xxBdx33+854ICTGTr0Mpqa/mi7Nv/wD3D++bBqFSxaBJdfXqNi9zDVvCldUgmLVzVz84pGWja3MmTLwQwZPpmrrz6Hiy66gA8+GMqSJdDxlGUEvPNOYXjLFqirq03dexr3LKUaWryqmW/8dA3Nm1tJwMaNwbrnVzLooEP5+te/zujRsOMpy+uugx//GEaPhpkz4bbbalH5nsewlGro5hWNtLa1bxtve7OJtrdbeGTlI0yZMoX58+fz6quvbLfMwoVwySXQ1FQ4X3nRRfDxx9Wte0/kYbhUQy2bW7cbHzx2GB80ncSIC0ay+sb/xre+tfMyt99euAgEcOKJsHUrvPEGjBhRhYL3YO5ZSjVUN3TwduODRm7ho7f/kAM/PoAPPyxcwJk1a/tlxoyBhx8uDD/3XCEsfTIY2tvbmTp1Kmed1Tt/CdKwlGpo3owJDN57wLbx2CtxyJnr2HjvdCZOLFz1njwZrr0Wli4ttPnHf4Qf/ACOPRYuvBDuvLNw0WdPd+uttzJx4sReW7+H4VINzZ46CmDb1fC6oYOZd8FIZk/d/j/NG274dHjSJPj1r6tZZd/X1NTEgw8+yPz58/n2t7/dK30YllKNzZ46altoatfMmXMnGzY8yEUXJYYMOXen+VddBStXFobffx/gnSk97cPDcEn92pIlP2PNmr9i5cp9ufPOJ2hp+Rzr1m3f5pZbYPXqwudv/gZg+eae9uOepaR+p+ON/O+teJLWrftx+umXsHXrVlpb5/Lnfz6Z1asvKLnswoUA97wF5/WoT8NSUr/yyY382+5PPWwWf8AIvnPHrxm65QX+9m+f4uSTSwflq6/Cyy8DPPBOT/v1MFxSv7LjjfwA7R8nbl7RuG28s7sDFi2Cc88FaC/doAuGpaR+Zccb+QcO2cpH7wymZXMrp556Khdc8D87fV5+0aLC7Va7wrCU1K/syo38AI2N8PbbhaeedoVhKalf2ZUb+aFwYWfOnF2/gd/XSkjqdzpeDa8bOph5Myb06F7VXXmthFfDJfU7tbiR38NwScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpTBsJSkDIalJGUwLCUpQ0XCMiLuiIjXI2JtJ/MjIr4bEesj4pmImFaJfiWpWiq1Z3kncEYX888Exhc/c4H/XaF+JakqKhKWKaXHgLe6aHI2cHcqeBwYGhEjK9G3JFVDtc5ZjgI2dBhvKk7bSUTMjYiGiGjYtGlTVYqTpO5UKyxLvU+t5JvSUkoLUkr1KaX64cOH93JZkpSnWmHZBBzaYXw00FKlviWpbNUKy6XAXxSvip8AbEkpbaxS35JUtoq8CjciFgKnAsMiogn4JrA3QErpn4FlwExgPfA+8N8r0a8kVUtFwjKldGE38xNwRSX6kqRa8AkeScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbDsI5YvX86ECRMYN24cN954Y63LkbQDw7IPaG9v54orruChhx5i3bp1LFy4kHXr1tW6LEkdGJZ9wBNPPMG4ceM44ogjGDRoEHPmzGHJkiW1LktSB4ZlHzB//iE89ti/cfTRhfHRo0fT3Ny8bf5PfgLHHFP4nHQSPP10jQqV9mCGZR/wuc+9wowZ39luWkRsGz78cHj0UXjmGfj7v4e5c6tdoaSBtS5gT7R4VTM3r2ikZXMrdUMH86XD3uOth9dvm9/U1ERdXd228ZNO+nTZE06ApqZqVisJDMuqW7yqmW/8dA2tbe0ANG9u5c53B/Dm8y9y4P4f8uGHsGjRIu65556Sy99+O5x5ZjUrlgQVOgyPiDMiojEi1kfENSXmXxIRmyJidfFzaSX67Y9uXtG4LSg/sbUd9j/5K7zyyitMnDiR888/n8mTJ++07MqVhbC86aZqVSvpE2XvWUbEAOB7wBeBJuDJiFiaUtrx3pd7U0pXlttff9eyubXk9K3Dj2L8+CNZu/bFkvOfeQYuvRQeeggOOqg3K5RUSiX2LKcD61NKL6WUPgQWAWdXYL27pbqhg0tOP/iP9u10mddeg3POgR/9CI48srcqk9SVSpyzHAVs6DDeBBxfot2fRcQpwPPAVSmlDSXaEBFzgbkAY8aMqUB5fcu8GRO2O2cJ8NYD03jn9RG8uxlGj4brr4e2tsK8yy6DG26AN9+Eyy8vTBs4EBoaalC8tAerRFhGiWlph/EHgIUppQ8i4jLgLuD0UitLKS0AFgDU19fvuJ5+b/bUUQDbXQ3/zo8/ZvbUAZ0u88MfFj6SaqcSYdkEHNphfDTQ0rFBSunNDqM/APboSxSzp47aFpqS+odKnLN8EhgfEYdHxCBgDrC0Y4OIGNlhdBbwXAX6laSqKXvPMqX0UURcCawABgB3pJSejYgbgIaU0lLgf0TELOAj4C3gknL7laRqipT67mnB+vr61OCVDEkVFhFPpZTqe7KMz4ZLUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjLstmH51a9+lREjRnD00UfXuhRJu4GKhGVEnBERjRGxPiKuKTF/n4i4tzj/NxExthL9duWSSy5h+fLlvd2NpD1E2WEZEQOA7wFnApOACyNi0g7Nvga8nVIaB9wC3FRuv9055ZRTOPDAA3u7G0l7iErsWU4H1qeUXkopfQgsAs7eoc3ZwF3F4fuAz0dEVKDvLj366GBeeOEBxo2DG2/cef5VV8GUKYXPkUfC0KG9XZGk/mpgBdYxCtjQYbwJOL6zNimljyJiC3AQ8MaOK4uIucBcgDFjxuxyUe3tcO21B3LYYRezdu1yjjsOZs2CSR32eW+55dPh226DVat2uTtJu7lKhGWpPcS0C20KE1NaACwAqK+vL9mmlMWrmrl5RSMtm1upGzqYLx3yJxx22H689VYTgwbBnDmwZMn2YdnRwoVw/fW5vUna01TiMLwJOLTD+GigpbM2ETEQ2B94qwJ9A4Wg/MZP19C8uZUENG9u5bafNfHx4Hc/LWo0NDeXXv7VV+Hll+H00ytVkaTdTSXC8klgfEQcHhGDgDnA0h3aLAUuLg6fCzySUsrea+zOzSsaaW1r327aG48v5j9XrqCxsZHRo0fz6KOP0tlZ0kWL4NxzYcCASlUkaXdTdlimlD4CrgRWAM8B/5pSejYiboiIWcVmtwMHRcR64OvATrcXlaNlc+tO0w487U/ZZ9QZtLW10dTUxB//8eeoqyu9/KJFcOGFlaxI0u6mEucsSSktA5btMO3aDsNbgfMq0VcpdUMH07xDYA4auYW0ZT9efhlGjSoE4j337LxsYyO8/TaceGJvVSdpd7BbPMEzb8YEBu+9/TH0H+yzF/Ou/z0zZsDEiXD++TB5Mlx7LSztcJJg4cLCxZ/ev5FJUn8WFTx1WHH19fWpoaEhq+2OV8PnzZjA7KmjerlCSf1RRDyVUqrvyTIVOQzvC2ZPHWU4Suo1u8VhuCT1NsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZygrLiDgwIn4eES8Uvw/opF17RKwufpaW06ck1UK5e5bXAA+nlMYDDxfHS2lNKU0pfmaV2ackVV25YXk2cFdx+C5gdpnrk6Q+qdywPDiltBGg+D2ik3b7RkRDRDweEV0GakTMLbZt2LRpU5nlSVJlDOyuQUT8AjikxKz5PehnTEqpJSKOAB6JiDUppRdLNUwpLQAWANTX16ce9CFJvabbsEwpfaGzeRHxu4gYmVLaGBEjgdc7WUdL8fuliPglMBUoGZaS1BeVexi+FLi4OHwxsGTHBhFxQETsUxweBnwWWFdmv5JUVeWG5Y3AFyPiBeCLxXEioj4iflhsMxFoiIingZXAjSklw1JSv9LtYXhXUkpvAp8vMb0BuLQ4/J/An5TTjyTVmk/wSFIGw1KSMhiWkpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWkpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlKGssIyI8yLi2Yj4OCLqu2h3RkQ0RsT6iLimnD4lqRbK3bNcC5wDPNZZg4gYAHwPOBOYBFwYEZPK7FeSqmpgOQunlJ4DiIiumk0H1qeUXiq2XQScDawrp29JqqZqnLMcBWzoMN5UnFZSRMyNiIaIaNi0aVOvFydJObrds4yIXwCHlJg1P6W0JKOPUrudqbPGKaUFwAKA+vr6TttJUjV1G5YppS+U2UcTcGiH8dFAS5nrlKSqqsZh+JPA+Ig4PCIGAXOApVXoV5Iqptxbh74cEU3AicCDEbGiOL0uIpYBpJQ+Aq4EVgDPAf+aUnq2vLIlqbrKvRp+P3B/iektwMwO48uAZeX0JUm15BM8kpTBsJSkDIalJGUwLCUpg2EpSRkMS0nKYFhKUgbDUpIyGJaSlMGwlKQMhqUkZTAsJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbCUpAyGpSRlMCwlKYNhKUkZDEtJymBYSlIGw1KSMhiWvWjr1q1Mnz6dY489lsmTJ/PNb36z1iVJ2kUDa13A7myfffbhkUceYb/99qOtrY2TTz6ZM888kxNOOKHWpUnqIfcse1FEsN9++wHQ1tZGW1sbEVHjqiTtCsOyl7W3tzNlyhSGDx9BxG185SvHc8wx8Nvflm4/fz4ceigUM1ZSH2FY9rIBAwawevVq/uVf/h+vvDKQ++9fy4IF8Nd/Xbr9l74ETzxR3Roldc9zlhW0eFUzN69opGVzK3VDBzNvxgRmTx0FwMMP78dppzWxYsXLXH310WzeDBs3wsiR26/D05lS3+SeZYUsXtXMN366hubNrSSgeXMr8370H/zksXUAvPZaO88//zBHHXUUAKNHQ3NzDQuW1COGZYXcvKKR1rb27ab9fvMb/NWcsznmmGP41a9+zXHHHcdZZ521bb7XeqT+w8PwCmnZ3LrTtA+aTuXD9ovYa6/9mTMHTj/9lG3zmpqgrq6aFUoqh3uWFVI3dPBO04ZMe5XjrnqK1ath9my4+25ICR5/HPbff+fzlZL6LsOyQubNmMDgvQdsN23w3gOYN2MCADNnwhFHwLhx8Jd/Cd///qftpkz5dPjv/q5wPvP99wvf111XheIldStSSru+cMR5wHXARGB6Sqmhk3avAO8C7cBHKaX6nPXX19enhoaSq+yTuroaLqnviIincnPoE+Wes1wLnAP8n4y2p6WU3iizvz5t9tRRhqO0myorLFNKzwE+widpt1etc5YJ+PeIeCoi5nbVMCLmRkRDRDRs2rSpSuVJUte63bOMiF8Ah5SYNT+ltCSzn8+mlFoiYgTw84j4vymlx0o1TCktABZA4Zxl5volqVd1G5YppS+U20lKqaX4/XpE3A9MB0qGpST1Rb1+GB4RfxgRQz4ZBv6UwoUhSeo3ygrLiPhyRDQBJwIPRsSK4vS6iFhWbHYw8KuIeBp4AngwpbS8nH4lqdrKvRp+P3B/iektwMzi8EvAseX0I0m15hM8kpTBsJSkDGU97tjbImIT8Gqt68g0DOivTyhZe21Ye/V9UvdhKaXhPVmwT4dlfxIRDT191rSvsPbasPbqK6duD8MlKYNhKUkZDMvKWVDrAspg7bVh7dW3y3V7zlKSMrhnKUkZDEtJymBY7qKIOC8ino2IjyOi01sRIuKMiGiMiPURcU01a+xMRBwYET+PiBeK3wd00q49IlYXP0urXecOtXS5HSNin4i4tzj/NxExtvpV7iyj7ksiYlOH7XxpLeosJSLuiIjXI6LkH76Jgu8Wf7ZnImJatWvsTEbtp0bElg7b/dpuV5pS8rMLHwrvHZoA/BKo76TNAOBF4AhgEPA0MKkP1P6/gGuKw9cAN3XS7r1a15q7HYHLgX8uDs8B7u0ndV8C/FOta+2k/lOAacDaTubPBB4CAjgB+E2ta+5B7acCP+vJOt2z3EUppedSSo3dNJsOrE8pvZRS+hBYBJzd+9V162zgruLwXcDsGtaSI2c7dvyZ7gM+H7V/30lf/ffPkgp/oPutLpqcDdydCh4HhkZEn3jBc0btPWZY9q5RwIYO403FabV2cEppI0Dxe0Qn7fYtvuLj8YioZaDmbMdtbVJKHwFbgIOqUl3ncv/9/6x4GHtfRBxandIqoq/+fuc6MSKejoiHImJyd43Lfbvjbq0Cr9QotWdTlXu1uqq9B6sZkwqvAzkCeCQi1qSUXqxMhT2Ssx1rtq27kFPTA8DClNIHEXEZhb3j03u9ssroi9s8128pPB/+XkTMBBYD47tawLDsQir/lRpNQMc9hdFAS5nrzNJV7RHxu4gYmVLaWDxser2TdXzyOpCXIuKXwFQK5+CqLWc7ftKmKSIGAvtT4cOwXdBt3SmlNzuM/gC4qQp1VUrNfr/LlVJ6p8Pwsoj4fkQMS128rtvD8N71JDA+Ig6PiEEULjzU9Kpy0VLg4uLwxcBOe8kRcUBE7FMcHgZ8FlhXtQq3l7MdO/5M5wKPpOKZ/Brqtu4dzvHNAp6rYn3lWgr8RfGq+AnAlk9O7/R1EXHIJ+e0I2I6hSx8s8uFan3Vqr9+gC9T+D/rB8DvgBXF6XXAsg7tZgLPU9gjm1/ruos1HQQ8DLxQ/D6wOL0e+GFx+CRgDYUruGuAr9W45p22I3ADMKs4vC/wb8B6Cq8vOaLW2zmz7m8Bzxa380rgqFrX3KH2hcBGoK34u/414DLgsuL8AL5X/NnW0MldIX209is7bPfHgZO6W6ePO0pSBg/DJSmDYSlJGQxLScpgWEpSBsNSkjIYlpKUwbCUpAz/H6Dp6yAW5IhrAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots(figsize=(5,5))\n", - "inst = np.random.choice(n_test)\n", - "ax.scatter(X_test[inst][:, 0], X_test[inst][:, 1])\n", - "for i in range(n_objects):\n", - " ax.text(X_test[inst, i, 0]+0.01,\n", - " X_test[inst, i, 1]+0.01,\n", - " s=int(Y_test[inst, i]))\n", - " ax.text(X_test[inst, i, 0]+0.01,\n", - " X_test[inst, i, 1]-0.025,\n", - " s='{:.1f}'.format(scores[inst][i]),\n", - " color='b')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/notebooks/GeneralizationExperiments.ipynb b/docs/notebooks/GeneralizationExperiments.ipynb deleted file mode 100644 index 1a495e5d..00000000 --- a/docs/notebooks/GeneralizationExperiments.ipynb +++ /dev/null @@ -1,1423 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generalization Behaviour" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "import inspect\n", - "import os\n", - "import logging\n", - "import numpy as np\n", - "import pandas as pd\n", - "from docopt import docopt\n", - "from csrank import FATEObjectRanker, FETAObjectRanker, ObjectRankingDatasetGenerator\n", - "from csrank.callbacks import DebugOutput\n", - "from csrank.metrics import zero_one_rank_loss_for_scores\n", - "from csrank.util import rename_file_if_exist, configure_logging_numpy_keras, get_tensor_value\n", - "from keras.callbacks import History" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Defining the Constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "MODEL = \"aModel\"\n", - "ERROR_OUTPUT_STRING = 'Out of sample error {} : {} for n_objects {}'\n", - "his = History()\n", - "his.__name__ = \"History\"\n", - "objects = \"Objects\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Medoid Problem\n", - "In the medoid problem the goal of the learner is to sort a set of randomly generated points based on their distance to the _medoid_.\n", - "This problem is inspired by the setting of ranking similarity learning, where the goal is to learn a similarity function from triplets of objects.\n", - "\n", - "We will generate a random dataset where each instance contains provided number objects and 2 features. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def generate_dataset(n_objects=5, random_state=42):\n", - " parameters = {\"n_features\": 2, \"n_objects\": n_objects, \n", - " \"n_train_instances\": 10000, \"n_test_instances\": 100000,\n", - " \"dataset_type\": \"medoid\",\n", - " \"random_state\":random_state}\n", - " generator = ObjectRankingDatasetGenerator(**parameters)\n", - " return generator.get_single_train_test_split()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluation on different size rankings\n", - "Fit the given ranker and predict on rankings with different sizes and check the zero one rank loss for them to see the generalizing behaviour of the rankers." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def get_evaluation_result(gor, X_train, Y_train, epochs):\n", - " gor.fit(X_train, Y_train, callbacks=[DebugOutput(delta=10), his], verbose=False, epochs=epochs)\n", - " eval_results = {}\n", - " for n_objects in np.arange(3, 20):\n", - " _, _, X_test, Y_test = generate_dataset(n_objects=n_objects, random_state=seed + n_objects * 5)\n", - " y_pred_scores = gor.predict_scores(X_test, batch_size=X_test.shape[0])\n", - " metric_loss = get_tensor_value(zero_one_rank_loss_for_scores(Y_test, y_pred_scores))\n", - " logger.info(ERROR_OUTPUT_STRING.format(\"zero_one_rank_loss\", str(np.mean(metric_loss)), n_objects))\n", - " eval_results[n_objects] = metric_loss\n", - " return eval_results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Initialize the log file path and the dataframe path." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "n_objects = 5\n", - "dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", - "log_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.log\".format(n_objects))\n", - "df_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.csv\".format(n_objects))\n", - "random_state = np.random.RandomState(seed=42)\n", - "seed = random_state.randint(2 ** 32)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Initialize tensorflow and keras with the seed and initialize the log file path" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "rows_list = []\n", - "configure_logging_numpy_keras(seed=seed, log_path=log_path)\n", - "logger = logging.getLogger(\"Experiment\")\n", - "X_train, Y_train, _, _ = generate_dataset(n_objects=n_objects, random_state=seed)\n", - "n_instances, n_objects, n_features = X_train.shape\n", - "\n", - "epochs = 500\n", - "params = {\"n_objects\": n_objects, \n", - " \"n_features\": n_features, \n", - " \"n_object_features\": n_features, \n", - " \"use_early_stopping\": True, \n", - " \"metrics\":[zero_one_rank_loss_for_scores]}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Evaluate the FETANetwork with best parameters and check the generalization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "logger.info(\"############################# With Best Parameters FETA ##############################\")\n", - "gor = FETAObjectRanker(**params)\n", - "result = get_evaluation_result(gor, X_train, Y_train, epochs)\n", - "result[MODEL] = \"FETARankerDefault\"\n", - "rows_list.append(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Evaluate the FATEObjectRanker with best parameters and check the generalization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from csrank.losses import smooth_rank_loss\n", - "logger.info(\"############################# With Best Parameters FATE ##############################\")\n", - "gor = FATEObjectRanker(**params)\n", - "result = get_evaluation_result(gor, X_train, Y_train, epochs)\n", - "result[MODEL] = \"FATERankerDefault\"\n", - "rows_list.append(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save the results into a dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aModelFATERankerDefault
30.218561
40.947871
50.692061
60.669744
70.932732
80.804914
90.941135
100.964858
110.970023
120.090049
130.503294
140.228598
150.287196
160.000157
170.060977
180.381702
190.617856
\n", - "
" - ], - "text/plain": [ - "aModel FATERankerDefault\n", - "3 0.218561\n", - "4 0.947871\n", - "5 0.692061\n", - "6 0.669744\n", - "7 0.932732\n", - "8 0.804914\n", - "9 0.941135\n", - "10 0.964858\n", - "11 0.970023\n", - "12 0.090049\n", - "13 0.503294\n", - "14 0.228598\n", - "15 0.287196\n", - "16 0.000157\n", - "17 0.060977\n", - "18 0.381702\n", - "19 0.617856" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(rows_list)\n", - "df = df.set_index(MODEL).T\n", - "cols = list(df.columns.values)\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If the there is an existing csv file saved then load it and add the new columns containing the results to it" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ObjectsFETARankerFATERanker
030.1949510.065285
140.2284020.120779
250.2241310.103804
360.2279140.109337
470.2289700.110699
580.2304980.112166
690.2305200.113533
7100.2306970.115446
8110.2302960.116400
9120.2302990.116445
10130.2296090.117337
11140.2294950.117598
12150.2279230.117623
13160.2275560.117890
14170.2269430.118051
15180.2262510.117521
16190.2253300.118120
\n", - "
" - ], - "text/plain": [ - " Objects FETARanker FATERanker\n", - "0 3 0.194951 0.065285\n", - "1 4 0.228402 0.120779\n", - "2 5 0.224131 0.103804\n", - "3 6 0.227914 0.109337\n", - "4 7 0.228970 0.110699\n", - "5 8 0.230498 0.112166\n", - "6 9 0.230520 0.113533\n", - "7 10 0.230697 0.115446\n", - "8 11 0.230296 0.116400\n", - "9 12 0.230299 0.116445\n", - "10 13 0.229609 0.117337\n", - "11 14 0.229495 0.117598\n", - "12 15 0.227923 0.117623\n", - "13 16 0.227556 0.117890\n", - "14 17 0.226943 0.118051\n", - "15 18 0.226251 0.117521\n", - "16 19 0.225330 0.118120" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if not os.path.isfile(df_path):\n", - " dataFrame = df\n", - "else:\n", - " dataFrame = pd.read_csv(df_path, index_col=0)\n", - " dataFrame = dataFrame.append(df, ignore_index=True)\n", - "dataFrame" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save the dataframe to given file path" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "dataFrame.to_csv(df_path, index=objects)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot the results of the zero one rank accuracy.\n", - "It takes too much time to run this experiment. \n", - "The stored results are used to plot the generalization behaviour of the rankers." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "/* Put everything inside the global mpl namespace */\n", - "window.mpl = {};\n", - "\n", - "\n", - "mpl.get_websocket_type = function() {\n", - " if (typeof(WebSocket) !== 'undefined') {\n", - " return WebSocket;\n", - " } else if (typeof(MozWebSocket) !== 'undefined') {\n", - " return MozWebSocket;\n", - " } else {\n", - " alert('Your browser does not have WebSocket support.' +\n", - " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", - " 'Firefox 4 and 5 are also supported but you ' +\n", - " 'have to enable WebSockets in about:config.');\n", - " };\n", - "}\n", - "\n", - "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", - " this.id = figure_id;\n", - "\n", - " this.ws = websocket;\n", - "\n", - " this.supports_binary = (this.ws.binaryType != undefined);\n", - "\n", - " if (!this.supports_binary) {\n", - " var warnings = document.getElementById(\"mpl-warnings\");\n", - " if (warnings) {\n", - " warnings.style.display = 'block';\n", - " warnings.textContent = (\n", - " \"This browser does not support binary websocket messages. \" +\n", - " \"Performance may be slow.\");\n", - " }\n", - " }\n", - "\n", - " this.imageObj = new Image();\n", - "\n", - " this.context = undefined;\n", - " this.message = undefined;\n", - " this.canvas = undefined;\n", - " this.rubberband_canvas = undefined;\n", - " this.rubberband_context = undefined;\n", - " this.format_dropdown = undefined;\n", - "\n", - " this.image_mode = 'full';\n", - "\n", - " this.root = $('
');\n", - " this._root_extra_style(this.root)\n", - " this.root.attr('style', 'display: inline-block');\n", - "\n", - " $(parent_element).append(this.root);\n", - "\n", - " this._init_header(this);\n", - " this._init_canvas(this);\n", - " this._init_toolbar(this);\n", - "\n", - " var fig = this;\n", - "\n", - " this.waiting = false;\n", - "\n", - " this.ws.onopen = function () {\n", - " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", - " fig.send_message(\"send_image_mode\", {});\n", - " if (mpl.ratio != 1) {\n", - " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", - " }\n", - " fig.send_message(\"refresh\", {});\n", - " }\n", - "\n", - " this.imageObj.onload = function() {\n", - " if (fig.image_mode == 'full') {\n", - " // Full images could contain transparency (where diff images\n", - " // almost always do), so we need to clear the canvas so that\n", - " // there is no ghosting.\n", - " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", - " }\n", - " fig.context.drawImage(fig.imageObj, 0, 0);\n", - " };\n", - "\n", - " this.imageObj.onunload = function() {\n", - " fig.ws.close();\n", - " }\n", - "\n", - " this.ws.onmessage = this._make_on_message_function(this);\n", - "\n", - " this.ondownload = ondownload;\n", - "}\n", - "\n", - "mpl.figure.prototype._init_header = function() {\n", - " var titlebar = $(\n", - " '
');\n", - " var titletext = $(\n", - " '
');\n", - " titlebar.append(titletext)\n", - " this.root.append(titlebar);\n", - " this.header = titletext[0];\n", - "}\n", - "\n", - "\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "\n", - "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "mpl.figure.prototype._init_canvas = function() {\n", - " var fig = this;\n", - "\n", - " var canvas_div = $('
');\n", - "\n", - " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", - "\n", - " function canvas_keyboard_event(event) {\n", - " return fig.key_event(event, event['data']);\n", - " }\n", - "\n", - " canvas_div.keydown('key_press', canvas_keyboard_event);\n", - " canvas_div.keyup('key_release', canvas_keyboard_event);\n", - " this.canvas_div = canvas_div\n", - " this._canvas_extra_style(canvas_div)\n", - " this.root.append(canvas_div);\n", - "\n", - " var canvas = $('');\n", - " canvas.addClass('mpl-canvas');\n", - " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", - "\n", - " this.canvas = canvas[0];\n", - " this.context = canvas[0].getContext(\"2d\");\n", - "\n", - " var backingStore = this.context.backingStorePixelRatio ||\n", - "\tthis.context.webkitBackingStorePixelRatio ||\n", - "\tthis.context.mozBackingStorePixelRatio ||\n", - "\tthis.context.msBackingStorePixelRatio ||\n", - "\tthis.context.oBackingStorePixelRatio ||\n", - "\tthis.context.backingStorePixelRatio || 1;\n", - "\n", - " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", - "\n", - " var rubberband = $('');\n", - " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", - "\n", - " var pass_mouse_events = true;\n", - "\n", - " canvas_div.resizable({\n", - " start: function(event, ui) {\n", - " pass_mouse_events = false;\n", - " },\n", - " resize: function(event, ui) {\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " stop: function(event, ui) {\n", - " pass_mouse_events = true;\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " });\n", - "\n", - " function mouse_event_fn(event) {\n", - " if (pass_mouse_events)\n", - " return fig.mouse_event(event, event['data']);\n", - " }\n", - "\n", - " rubberband.mousedown('button_press', mouse_event_fn);\n", - " rubberband.mouseup('button_release', mouse_event_fn);\n", - " // Throttle sequential mouse events to 1 every 20ms.\n", - " rubberband.mousemove('motion_notify', mouse_event_fn);\n", - "\n", - " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", - " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", - "\n", - " canvas_div.on(\"wheel\", function (event) {\n", - " event = event.originalEvent;\n", - " event['data'] = 'scroll'\n", - " if (event.deltaY < 0) {\n", - " event.step = 1;\n", - " } else {\n", - " event.step = -1;\n", - " }\n", - " mouse_event_fn(event);\n", - " });\n", - "\n", - " canvas_div.append(canvas);\n", - " canvas_div.append(rubberband);\n", - "\n", - " this.rubberband = rubberband;\n", - " this.rubberband_canvas = rubberband[0];\n", - " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", - " this.rubberband_context.strokeStyle = \"#000000\";\n", - "\n", - " this._resize_canvas = function(width, height) {\n", - " // Keep the size of the canvas, canvas container, and rubber band\n", - " // canvas in synch.\n", - " canvas_div.css('width', width)\n", - " canvas_div.css('height', height)\n", - "\n", - " canvas.attr('width', width * mpl.ratio);\n", - " canvas.attr('height', height * mpl.ratio);\n", - " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", - "\n", - " rubberband.attr('width', width);\n", - " rubberband.attr('height', height);\n", - " }\n", - "\n", - " // Set the figure to an initial 600x600px, this will subsequently be updated\n", - " // upon first draw.\n", - " this._resize_canvas(600, 600);\n", - "\n", - " // Disable right mouse context menu.\n", - " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", - " return false;\n", - " });\n", - "\n", - " function set_focus () {\n", - " canvas.focus();\n", - " canvas_div.focus();\n", - " }\n", - "\n", - " window.setTimeout(set_focus, 100);\n", - "}\n", - "\n", - "mpl.figure.prototype._init_toolbar = function() {\n", - " var fig = this;\n", - "\n", - " var nav_element = $('
')\n", - " nav_element.attr('style', 'width: 100%');\n", - " this.root.append(nav_element);\n", - "\n", - " // Define a callback function for later on.\n", - " function toolbar_event(event) {\n", - " return fig.toolbar_button_onclick(event['data']);\n", - " }\n", - " function toolbar_mouse_event(event) {\n", - " return fig.toolbar_button_onmouseover(event['data']);\n", - " }\n", - "\n", - " for(var toolbar_ind in mpl.toolbar_items) {\n", - " var name = mpl.toolbar_items[toolbar_ind][0];\n", - " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", - " var image = mpl.toolbar_items[toolbar_ind][2];\n", - " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", - "\n", - " if (!name) {\n", - " // put a spacer in here.\n", - " continue;\n", - " }\n", - " var button = $('