From 47074f8f701116a3e7cb7f0572425abfadb76da7 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 15:39:21 +0200
Subject: [PATCH 01/11] Add FedGDO class. This class is equivalent to the
 previous FedGDO_reset_local and FedGDO_persistent The reset of the local
 optimizers between each global batch can be set via a mpl_parameter

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/multi_partner_learning/fast_mpl.py | 89 +++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/mplc/multi_partner_learning/fast_mpl.py b/mplc/multi_partner_learning/fast_mpl.py
index edeaf8cf..16cbb81b 100644
--- a/mplc/multi_partner_learning/fast_mpl.py
+++ b/mplc/multi_partner_learning/fast_mpl.py
@@ -575,3 +575,92 @@ def fit_epoch(model, train_dataset, partners_grads, smodel_list, global_grad, ag
                 break
 
         self.log_end_training()
+
+
+class FastFedGDO(FastFedAvg):
+    """
+     This method is inspired from Federated gradient, but with modification on the local computation of the gradient.
+    In this version we use a local optimizer (partner-specific) to do several minimization steps of the local-loss
+    during a minibatch. We use the sum of these weighs-updates as the gradient which is sent to the global optimizer.
+    The global optimizer aggregates these gradients-like which have been sent by the partners,
+    and performs a optimization step with this aggregated gradient.
+    """
+    name = 'FastFedGDO'
+
+    def __init__(self, scenario, reset_local_optims=False, **kwargs):
+        super(FastFedGDO, self).__init__(scenario, **kwargs)
+        self.reset_local_optims = reset_local_optims
+
+    def init_specific_tf_variable(self):
+        # generate tf Variables in which we will store the model weights
+        self.model_stateholder = [tf.Variable(initial_value=w.read_value()) for w in self.model.trainable_weights]
+        self.partners_grads = [[tf.Variable(initial_value=w.read_value()) for w in self.model.trainable_weights]
+                               for _ in self.partners_list]
+        self.global_grad = [tf.Variable(initial_value=w.read_value()) for w in self.model.trainable_weights]
+        self.partners_optimizers = [self.model.optimizer.from_config(self.model.optimizer.get_config()) for _ in
+                                    self.partners_list]
+
+    def fit(self):
+        # TF function definition
+        @tf.function
+        def fit_minibatch(model, model_stateholder, partners_minibatches, partners_optimizers, partners_grads,
+                          global_grad, aggregation_weights):
+            for model_w, old_w in zip(model.trainable_weights, model_stateholder):  # store model weights
+                old_w.assign(model_w.read_value())
+
+            for p_id, minibatch in enumerate(partners_minibatches):  # minibatch == (x,y)
+                # minibatch[0] in a tensor of shape=(number of batch, batch size, img).
+                # We cannot iterate on tensors, so we convert this tensor to a list of
+                # *number of batch* tensors with shape=(batch size, img)
+                x_minibatch = tf.unstack(minibatch[0], axis=0)
+                y_minibatch = tf.unstack(minibatch[1], axis=0)  # same here, with labels
+
+                for x, y in zip(x_minibatch, y_minibatch):  # iterate over batches
+                    with tf.GradientTape() as tape:
+                        y_pred = model(x)
+                        loss = model.compiled_loss(y, y_pred)
+                    model.compiled_metrics.update_state(y, y_pred)  # log the loss and accuracy
+                    partners_optimizers[p_id].minimize(loss, model.trainable_weights,
+                                                       tape=tape)  # perform local optimizations
+                # get the gradient as theta_before_minibatch - theta_after_minibatch
+                for grad_per_layer, w_old, w_new in zip(partners_grads[p_id], model_stateholder,
+                                                        model.trainable_weights):
+                    grad_per_layer.assign((w_old - w_new))
+
+                for model_w, old_w in zip(model.trainable_weights,
+                                          model_stateholder):  # reset the model's weights for the next partner
+                    model_w.assign(old_w.read_value())
+
+            # at the end of the minibatch, aggregate all the local grads
+            for i, grads_per_layer in enumerate(zip(*partners_grads)):
+                global_grad[i].assign(tf.tensordot(grads_per_layer, aggregation_weights, [0, 0]))
+
+            # perform one optimization update using the aggregated gradient
+            model.optimizer.apply_gradients(
+                zip(global_grad, model.trainable_weights))
+
+            # Execution
+
+        self.timer = time.time()
+        for e in range(self.epoch_count):
+            self.epoch_timer = time.time()
+            for partners_minibatches in zip(*self.train_dataset):  # <- partners_minibatches == [(x, y)] * nb_partners
+                if self.reset_local_optims:
+                    self.partners_optimizers = [self.model.optimizer.from_config(self.model.optimizer.get_config()) for
+                                                _ in
+                                                self.partners_list]  # reset the local optimizers
+                fit_minibatch(self.model,
+                              self.model_stateholder,
+                              partners_minibatches,
+                              self.partners_optimizers,
+                              self.partners_grads,
+                              self.global_grad,
+                              self.aggregation_weights)
+            epoch_history = self.get_epoch_history()  # compute val and train acc and loss.
+            # add the epoch _history to self _history, and log epoch number, and metrics values.
+            self.log_epoch(e, epoch_history)
+            self.epochs_index += 1
+            if self.early_stop():
+                break
+
+        self.log_end_training()

From 4b06777355eaea67fcbd97e283c7d7cb3a851b49 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 15:56:07 +0200
Subject: [PATCH 02/11] Add fast mpl methods to documentation Add fedgdo in the
 mpl.init

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/doc/documentation.md               | 14 ++++++++++++++
 mplc/multi_partner_learning/__init__.py |  3 ++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/mplc/doc/documentation.md b/mplc/doc/documentation.md
index 535fdedb..ccad33be 100644
--- a/mplc/doc/documentation.md
+++ b/mplc/doc/documentation.md
@@ -314,6 +314,20 @@ There are several parameters influencing how the collaborative and distributed l
   - `'seqavg'`: stands for sequential averaging
 
     ![Schema seqavg](../../img/collaborative_rounds_seqavg.png)
+    
+  The previous methods are implemented to be agnostic to the model used. However, some of these methods are also implemented within the tensorflow interface, at a lower level. These implementations are usually faster, especially if you are using a GPU. Unfortunately, those methods are only compatible with tensorflow.keras-based models. The mplc-native dataset `Titanic` cannot be used.
+  
+  Available methods:
+    - `'fast-fedavg'`: equivalent to FedAvg.
+    - `'fast-fedgrads'`: equivalent to FedGrad.
+    - `'fast-fedavg-smodel'`: equivalent to FedAvg, with smodel
+    - `'fast-fedgrad-smodel'`: equivalent to FedGrad with smodel
+    - `'fast-fedgdo'`: Stand for Federated averaging with double optimizers. This method is inspired from Federated gradient, but with modification on the local computation of the gradient.
+    A local optimizer (partner-specific) is used to do several minimization steps (local minibatches) of the local-loss
+    during a global-minibatch. We use the sum of these weighs-updates as the gradient which is sent to the global optimizer.
+    The global optimizer aggregates these gradients, which have been sent by the partners,
+    and performs a optimization step with this aggregated gradient.
+       
 
   Example: `multi_partner_learning_approach='seqavg'`
 
diff --git a/mplc/multi_partner_learning/__init__.py b/mplc/multi_partner_learning/__init__.py
index 74a02708..d46f15b4 100644
--- a/mplc/multi_partner_learning/__init__.py
+++ b/mplc/multi_partner_learning/__init__.py
@@ -14,7 +14,8 @@
     'fast-fedavg': fast_mpl.FastFedAvg,
     'fast-fedgrads': fast_mpl.FastFedGrad,
     'fast-fedavg-smodel': fast_mpl.FastFedAvgSmodel,
-    'fast-fedgrad-smodel': fast_mpl.FastGradSmodel
+    'fast-fedgrad-smodel': fast_mpl.FastGradSmodel,
+    'fast-fedgdo': fast_mpl.FastFedGDO
 }
 
 MULTI_PARTNER_LEARNING_APPROACHES = BASIC_MPL_APPROACHES.copy()

From 029821badac57d4d6bfed221968147eeef224837 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 16:04:42 +0200
Subject: [PATCH 03/11] add the possibility to change the global optimizer

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/multi_partner_learning/fast_mpl.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mplc/multi_partner_learning/fast_mpl.py b/mplc/multi_partner_learning/fast_mpl.py
index 16cbb81b..0d7d4dcf 100644
--- a/mplc/multi_partner_learning/fast_mpl.py
+++ b/mplc/multi_partner_learning/fast_mpl.py
@@ -587,8 +587,9 @@ class FastFedGDO(FastFedAvg):
     """
     name = 'FastFedGDO'
 
-    def __init__(self, scenario, reset_local_optims=False, **kwargs):
+    def __init__(self, scenario, reset_local_optims=False, global_optimiser=None, **kwargs):
         super(FastFedGDO, self).__init__(scenario, **kwargs)
+        self.global_optimiser = global_optimiser
         self.reset_local_optims = reset_local_optims
 
     def init_specific_tf_variable(self):
@@ -599,6 +600,8 @@ def init_specific_tf_variable(self):
         self.global_grad = [tf.Variable(initial_value=w.read_value()) for w in self.model.trainable_weights]
         self.partners_optimizers = [self.model.optimizer.from_config(self.model.optimizer.get_config()) for _ in
                                     self.partners_list]
+        if self.global_optimiser:
+            self.model.compile(optimizer=self.global_optimiser)
 
     def fit(self):
         # TF function definition

From 40379dc6ed75eb1dba9af1b0b53901d3dae16266 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 16:47:05 +0200
Subject: [PATCH 04/11] Fix init order

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/multi_partner_learning/fast_mpl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mplc/multi_partner_learning/fast_mpl.py b/mplc/multi_partner_learning/fast_mpl.py
index 0d7d4dcf..ccf7d902 100644
--- a/mplc/multi_partner_learning/fast_mpl.py
+++ b/mplc/multi_partner_learning/fast_mpl.py
@@ -588,9 +588,9 @@ class FastFedGDO(FastFedAvg):
     name = 'FastFedGDO'
 
     def __init__(self, scenario, reset_local_optims=False, global_optimiser=None, **kwargs):
-        super(FastFedGDO, self).__init__(scenario, **kwargs)
         self.global_optimiser = global_optimiser
         self.reset_local_optims = reset_local_optims
+        super(FastFedGDO, self).__init__(scenario, **kwargs)
 
     def init_specific_tf_variable(self):
         # generate tf Variables in which we will store the model weights

From b042b878fc1bcc238876e425179df8db74229198 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 16:37:36 +0200
Subject: [PATCH 05/11] wip

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/contributivity.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mplc/contributivity.py b/mplc/contributivity.py
index 848d1553..44a4a5b4 100644
--- a/mplc/contributivity.py
+++ b/mplc/contributivity.py
@@ -1113,7 +1113,7 @@ def compute_relative_perf_matrix(self):
 
         return relative_perf_matrix
 
-    def s_model(self):  # TOD refacto
+    def s_model(self):
         start = timer()
         mpl = basic_mpl.FedAvgSmodel(self.scenario)
         mpl.fit()

From 1ddcca064f599aebd05caf135ddcb276520a597e Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 19:34:42 +0200
Subject: [PATCH 06/11] Add statistical distances computation

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/contributivity.py | 43 ++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/mplc/contributivity.py b/mplc/contributivity.py
index 44a4a5b4..b7ea0e4e 100644
--- a/mplc/contributivity.py
+++ b/mplc/contributivity.py
@@ -12,12 +12,13 @@
 from timeit import default_timer as timer
 
 import numpy as np
+import tensorflow as tf
 from loguru import logger
 from scipy.stats import norm
 from sklearn.linear_model import LinearRegression
 
 from . import constants
-from .multi_partner_learning import basic_mpl
+from .multi_partner_learning import basic_mpl, fast_mpl
 
 
 class KrigingModel:
@@ -1113,23 +1114,37 @@ def compute_relative_perf_matrix(self):
 
         return relative_perf_matrix
 
-    def s_model(self):
+    def statistcal_distances_via_smodel(self):
+
         start = timer()
-        mpl = basic_mpl.FedAvgSmodel(self.scenario)
+        mpl = fast_mpl.FastFedAvgSmodel(self.scenario, self.scenario.mpl.pretrain_epochs)
         mpl.fit()
-        theta_estimated = np.zeros((mpl.partners_count,
-                                    mpl.dataset.num_classes,
-                                    mpl.dataset.num_classes))
+        cross_entropy = tf.keras.metrics.CategoricalCrossentropy()
+        self.contributivity_scores = {'Kullbakc divergence': [0 for _ in mpl.partners_list],
+                                      'ma': [0 for _ in mpl.partners_list], 'Hennigen': [0 for _ in mpl.partners_list]}
         for i, partnerMpl in enumerate(mpl.partners_list):
-            theta_estimated[i] = (np.exp(partnerMpl.noise_layer_weights) / np.sum(
-                np.exp(partnerMpl.noise_layer_weights), axis=2))
-        self.contributivity_scores = np.exp(- np.array([np.linalg.norm(
-            theta_estimated[i] - np.identity(mpl.dataset.num_classes)
-        ) for i in range(len(self.scenario.partners_list))]))
-
-        self.name = "S-Model"
+            y_global = mpl.model.predict(partnerMpl.x_train)
+            y_local = mpl.smodel_list[i].predict(y_global)
+            cross_entropy.update_state(y_global, y_local)
+            cs = cross_entropy.result().numpy()
+            cross_entropy.reset_state()
+            cross_entropy.update_state(y_global, y_global)
+            e = cross_entropy.result().numpy()
+            cross_entropy.reset_state()
+            self.contributivity_scores['Kullbakc divergence'][i] = cs - e
+            BC = 0
+            for y_g, y_l in zip(y_global, y_local):
+                BC += np.sum(np.sqrt(y_g * y_l))
+            BC /= len(y_global)
+            self.contributivity_scores['Kullback Leiber divergence'][i] = cs - e
+            self.contributivity_scores['Bhattacharyya distance'][i] = - np.log(BC)
+            self.contributivity_scores['Hellinger metric'][i] = np.sqrt(1 - BC)
+
+        self.name = "Statistic metric via S-model"
         self.scores_std = np.zeros(mpl.partners_count)
-        self.normalized_scores = self.contributivity_scores / np.sum(self.contributivity_scores)
+        self.normalized_scores = {}
+        for key, value in self.contributivity_scores.items():
+            self.normalized_scores[key] = value / np.sum(value)
         end = timer()
         self.computation_time_sec = end - start
 

From 28db75b67af5740a51433e202e92a7f1bba1c152 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sat, 22 May 2021 21:29:37 +0200
Subject: [PATCH 07/11] Change name of contributivity method

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/contributivity.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mplc/contributivity.py b/mplc/contributivity.py
index b7ea0e4e..1bf20cef 100644
--- a/mplc/contributivity.py
+++ b/mplc/contributivity.py
@@ -1117,7 +1117,12 @@ def compute_relative_perf_matrix(self):
     def statistcal_distances_via_smodel(self):
 
         start = timer()
-        mpl = fast_mpl.FastFedAvgSmodel(self.scenario, self.scenario.mpl.pretrain_epochs)
+        try:
+            mpl_pretrain = self.scenario.mpl.pretrain_epochs
+        except AttributeError as e:
+            mpl_pretrain = 2
+
+        mpl = fast_mpl.FastFedAvgSmodel(self.scenario, mpl_pretrain)
         mpl.fit()
         cross_entropy = tf.keras.metrics.CategoricalCrossentropy()
         self.contributivity_scores = {'Kullbakc divergence': [0 for _ in mpl.partners_list],
@@ -1210,7 +1215,7 @@ def compute_contributivity(
             # Contributivity 10: Partner valuation by reinforcement learning
             self.PVRL(learning_rate=0.2)
         elif method_to_compute == "S-Model":
-            self.s_model()
+            self.statistcal_distances_via_smodel()
         else:
             logger.warning("Unrecognized name of method, statement ignored!")
 

From 4bedc3fa2b46ebbb268850814402fd592bb9cf4d Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sun, 23 May 2021 10:49:19 +0200
Subject: [PATCH 08/11] fix tf metric use fix log flake it

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/contributivity.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/mplc/contributivity.py b/mplc/contributivity.py
index 1bf20cef..505fbb86 100644
--- a/mplc/contributivity.py
+++ b/mplc/contributivity.py
@@ -85,9 +85,16 @@ def __str__(self):
                 + str(self.first_charac_fct_calls_count)
                 + "\n"
         )
-        output += f"Contributivity scores: {np.round(self.contributivity_scores, 3)}\n"
-        output += f"Std of the contributivity scores: {np.round(self.scores_std, 3)}\n"
-        output += f"Normalized contributivity scores: {np.round(self.normalized_scores, 3)}\n"
+        if isinstance(self.contributivity_scores, dict):
+            for key, value in self.contributivity_scores.items():
+                output += f'Metric: {key}\n'
+                output += f"Contributivity scores : {np.round(value, 3)}\n"
+                output += f"Std of the contributivity scores: {np.round(self.scores_std[key], 3)}\n"
+                output += f"Normalized contributivity scores: {np.round(self.normalized_scores[key], 3)}\n"
+        else:
+            output += f"Contributivity scores : {np.round(self.contributivity_scores, 3)}\n"
+            output += f"Std of the contributivity scores: {np.round(self.scores_std, 3)}\n"
+            output += f"Normalized contributivity scores: {np.round(self.normalized_scores, 3)}\n"
 
         return output
 
@@ -1119,24 +1126,29 @@ def statistcal_distances_via_smodel(self):
         start = timer()
         try:
             mpl_pretrain = self.scenario.mpl.pretrain_epochs
-        except AttributeError as e:
+        except AttributeError:
             mpl_pretrain = 2
 
         mpl = fast_mpl.FastFedAvgSmodel(self.scenario, mpl_pretrain)
         mpl.fit()
         cross_entropy = tf.keras.metrics.CategoricalCrossentropy()
-        self.contributivity_scores = {'Kullbakc divergence': [0 for _ in mpl.partners_list],
-                                      'ma': [0 for _ in mpl.partners_list], 'Hennigen': [0 for _ in mpl.partners_list]}
+        self.contributivity_scores = {'Kullback Leiber divergence': [0 for _ in mpl.partners_list],
+                                      'Bhattacharyya distance': [0 for _ in mpl.partners_list],
+                                      'Hellinger metric': [0 for _ in mpl.partners_list]}
+        self.scores_std = {'Kullback Leiber divergence': [0 for _ in mpl.partners_list],
+                           'Bhattacharyya distance': [0 for _ in mpl.partners_list],
+                           'Hellinger metric': [0 for _ in mpl.partners_list]}
+        # TODO; The variance of our estimation is likely to be estimated.
+
         for i, partnerMpl in enumerate(mpl.partners_list):
             y_global = mpl.model.predict(partnerMpl.x_train)
             y_local = mpl.smodel_list[i].predict(y_global)
             cross_entropy.update_state(y_global, y_local)
             cs = cross_entropy.result().numpy()
-            cross_entropy.reset_state()
+            cross_entropy.reset_states()
             cross_entropy.update_state(y_global, y_global)
             e = cross_entropy.result().numpy()
-            cross_entropy.reset_state()
-            self.contributivity_scores['Kullbakc divergence'][i] = cs - e
+            cross_entropy.reset_states()
             BC = 0
             for y_g, y_l in zip(y_global, y_local):
                 BC += np.sum(np.sqrt(y_g * y_l))
@@ -1146,7 +1158,6 @@ def statistcal_distances_via_smodel(self):
             self.contributivity_scores['Hellinger metric'][i] = np.sqrt(1 - BC)
 
         self.name = "Statistic metric via S-model"
-        self.scores_std = np.zeros(mpl.partners_count)
         self.normalized_scores = {}
         for key, value in self.contributivity_scores.items():
             self.normalized_scores[key] = value / np.sum(value)

From 2b476d52b937526aa4cab209ead9cdc099a841a1 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sun, 23 May 2021 14:37:19 +0200
Subject: [PATCH 09/11] fix sc.to_dataframe method to handle contributivity
 scores as dict

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 mplc/scenario.py | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/mplc/scenario.py b/mplc/scenario.py
index ace252ed..9661832c 100644
--- a/mplc/scenario.py
+++ b/mplc/scenario.py
@@ -537,22 +537,39 @@ def to_dataframe(self):
             df = df.append(dict_results, ignore_index=True)
 
         for contrib in self.contributivity_list:
-
             # Contributivity data
-            dict_results["contributivity_method"] = contrib.name
-            dict_results["contributivity_scores"] = contrib.contributivity_scores
-            dict_results["contributivity_stds"] = contrib.scores_std
-            dict_results["computation_time_sec"] = contrib.computation_time_sec
-            dict_results["first_characteristic_calls_count"] = contrib.first_charac_fct_calls_count
-
-            for i in range(self.partners_count):
-                # Partner-specific data
-                dict_results["partner_id"] = i
-                dict_results["dataset_fraction_of_partner"] = self.amounts_per_partner[i]
-                dict_results["contributivity_score"] = contrib.contributivity_scores[i]
-                dict_results["contributivity_std"] = contrib.scores_std[i]
-
-                df = df.append(dict_results, ignore_index=True)
+            if isinstance(contrib.contributivity_scores, dict):
+                for key, value in contrib.contributivity_scores.items():
+                    dict_results["contributivity_method"] = f'{contrib.name} - {key}'
+                    dict_results["contributivity_scores"] = value
+                    dict_results["contributivity_stds"] = contrib.scores_std[key]
+                    dict_results["computation_time_sec"] = contrib.computation_time_sec
+                    dict_results["first_characteristic_calls_count"] = contrib.first_charac_fct_calls_count
+
+                    for i in range(self.partners_count):
+                        # Partner-specific data
+                        dict_results["partner_id"] = i
+                        dict_results["dataset_fraction_of_partner"] = self.amounts_per_partner[i]
+                        dict_results["contributivity_score"] = value[i]
+                        dict_results["contributivity_std"] = contrib.scores_std[key][i]
+
+                        df = df.append(dict_results, ignore_index=True)
+
+            else:
+                dict_results["contributivity_method"] = contrib.name
+                dict_results["contributivity_scores"] = contrib.contributivity_scores
+                dict_results["contributivity_stds"] = contrib.scores_std
+                dict_results["computation_time_sec"] = contrib.computation_time_sec
+                dict_results["first_characteristic_calls_count"] = contrib.first_charac_fct_calls_count
+
+                for i in range(self.partners_count):
+                    # Partner-specific data
+                    dict_results["partner_id"] = i
+                    dict_results["dataset_fraction_of_partner"] = self.amounts_per_partner[i]
+                    dict_results["contributivity_score"] = contrib.contributivity_scores[i]
+                    dict_results["contributivity_std"] = contrib.scores_std[i]
+
+                    df = df.append(dict_results, ignore_index=True)
 
         return df
 

From dcd0ed90adec00a2d2cebd168079a389ac0bc8bb Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sun, 23 May 2021 16:06:31 +0200
Subject: [PATCH 10/11] Fix smodel initialization Fix kwargs use for mpl in
 contributivity

S-model initialization could fail if the confusion matrix has not the right shape, which can be the case if some labels are not included in the dataset of a partner. btw I noticed that smodel can only work with datasets with 10 labels, that's only cifar and mnist. I opened an issue about that
Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 .coverage                                | Bin 53248 -> 53248 bytes
 mplc/contributivity.py                   |   7 +------
 mplc/multi_partner_learning/basic_mpl.py |   3 ++-
 mplc/multi_partner_learning/fast_mpl.py  |   4 ++--
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/.coverage b/.coverage
index 0b5c8725561825bddafd260cc330e23e8c175e41..eee57be0e4fe4c778a14d8815e230b8d562d2bd2 100644
GIT binary patch
delta 3477
zcmb_e3vg3a8a_9<&EvlQ^pU5OwyYrT7F#W)t<|PTXM8{ly9&};3~30Ev3Z!JJgkre
zkX=TLXl9D8yVBid*ii;$VSUgFsSfHO#i&J5p|m>1I)b)5hN7j3|GmK%pmRIpnd$%C
zbI<+$|NqX(`KIAEWw=eb=^Bc8V%&^0hTJf7)BhH=SxYVI0s^ms7xILI!WyBRKg~bS
zH}EE|n_J0^XZNwy%u&Y8=(VqD7itDIt(u$Fed_h<g5Bx5v#LpREi{P~RNY)(<#jZ<
zot|Kz{_duRK+tcm_d9*wK)u`7=%|m*uKB~%)MHc7)fl+Jw{pH2iN$h=B`IRS*XRlc
zoqmyP8Y|b~^w|9_FLLXQ$Sspow=7lHxx(l4yF_WozSn^4B0217DfS|J&>nD#RIeVX
z4ms4C6xH3-7<Acuc7M?0^xGPpcE88vSyI2)9thfSS4AyZW7OjC`u$D5pv&tKwPcP_
z%cWM3IxhpMH_Fjcd`XLUkyASM#&l#Zki(q$o4fGC&&a34d`>y}X8&fs27AEexU{Wa
zz?{FsN!e^Jk1J@i;fCEWV9M!o+PpQ@ro-zA`dtkubD1l+5;N5Dn4v^YhSDE11cG+F
zkVu}-Az9XCGp5KtYkdZbTsAtrrewFS`kVs&qL9EI@WBNB48NT>3u}cg;TGP(9p)b5
za@d&GuGy?9XAEo&b4)YDc(i-fpQ-<<E=0dLr<ypAjucRGR9t2kmRdSeEQ@%hU6{wr
z{mCv&_tB9uS#_^w7smN?q(~O~3cE1)=!ioWRkmH|$ExKQc9At!EkD^s=Dl>}Mp;@e
zw~Gun9a$iYDbFs_AEYBrS^4DKh3<Dcsh{0Bu+fp}vfBJLyAbN=NQtZrSFsCT_Jz69
zF1RW>!cr-_Sa5DasfB4JFbrQp1xyu=3wwoaLW|HK2>fgO)BNpxCU=1w;y&kYW6!Z4
zv9GiLVXaJ@*~zq`UDPoOlO@{LVy4W|_LlLKDf&r^moi50+~}YT(H$GJ4SJf=PFC%&
zu3fc!dG<d)d*zu8U8-?e(Y}q@RAw}>v4+ZsRzLo1C>=kuDAw5~4JY5;tkTh1tuCQ5
z1HPSV+1f$BS3@d$%(2`<)E(ikubWaHx=FyL#peEZ+k3t-Jm`6gwePDXI-(#r8WXi-
z=O*##7b9m5pGIjTxu*RiiN}&jajma8Hh7J%B<!o$tz7?zc~<+GHMfMh0j{5XONuB$
z!p12phT6KbI~VJ%Pr2vyRKI-ijR*L{7vrG=N45_w9Afi0Y@^B;jkOp<?;rf??}|CB
zRKw8Un~YsgdX66KU)m8TVXU<<S0l!g*7eWTt<J{WFZMq@Xe}pwXLt1v-)bh6xN6G7
zF9^{oBq1EWA-caXV#aIr=PQu&XKUG;aeB0;>@920zaQ$@z3Q$4BBsR=GJQH)EIz34
zFP!w|<da<G<jP8MMlm#vm>Pw1G?mwDvG04bss@&B=2dHJPvz4j-++Q=l5iR!EQd)r
zqQMtKYNG}}EzHCd+}rU?b5qm$SWnlXW3v`b)2neIKO^zjk#D;DURW85oSb4Zk$uJ-
z6~3=bKVCG1dypK|XPq1#4uz@u{7>e->WfdDu;QLHQPE`G{*h*CXWziJ@iOay=Z<|e
z<&QfK9X|==)CD!Uf0?4{`48NM=ihlaq3(2VD4w|F&PgZKxz0Qy37M}U3-hp74W79;
zI&;Gz4UKg(;ETZ}k?WLLVNH!t)mx&};09fcg<{*!#$OpcyrF2ujMdZ5%-$A${3XxP
z#1s65?)H&C9kn(nOD|?G-(qcze?8bda(lk-B8(;mibDBYM&3>g=HO0TKDJKkOvYC6
zXOg4Cp{>kNG8B4w?ZXv&21Z&>S;vQBr~lj0ThaS$S#Qxm^G&udPIMK0IJ_kpiV^n%
z?oW5+mv`>@IO!do%PX+yas0Yi*Rf=dO`)c6s^bzW2FqDs7J(<>QCJ77U>W}VzZC3H
z2Y16P_zup(DL4Vg;Rw732jC6pVBr6-1Gd2ycnUA34npEFP^?7)#w>x>B!R{#f!ZK}
zN-sfLmIQRB1j-Bv6zOSr6)BxGv48|-K>`ymfsvEIz)GNJB*@aL=whs%sgb4`Y6;R+
z66n$-09pb;DFLrY!*;dYt~NTi)B>vsjKFy~jUor&bLfGOp&Q;sq5B~Q?eG%pgzc#H
zU+@puj9b(Ke}lEK8g<_WGnE!4;gx1Fn8aWdgFy^>F=UA$Qw$kmNEd@n3?K$U47?aP
K4ABGelJ5Z;MVd1J

delta 444
zcmZozz}&Ead4e<}=R_H2R!#;z`<ok67T8O1^IT@&-@%{HugG_ruZho^_apBr-XvZL
zp39pB1-A0AIdiixG;&XN^fTWq;QNIMsQwBA|4#k_ekI;dysLR7`I`C8@Y(Pt^IQR{
z-^Ro0!qUji2vHvUMM;XAcOC=(5B^vDUVMM}?(v=ETg~Urd!2U&?>wOL23|!8ZWcyP
zBkm=;Sr{A`z`%lmbAxYG**{q>po|UI<ORLr%#3-QlP`AVGjdFJ>`pde2TEA6Te)jX
zpGitrJdh;F;LX6DFkk+{t>>TFh1r0z8f-^=BWugcW#>NJKD+Y_n+)sZ8{M*uER(-=
z`!O<4cI}yG%mh>>#nck7_Wl3y>1>QFoSXvx*tmc$;O5`T!2g^73;$dGXZ-j1Z}4B_
zKgEBTe-Hmwpo^#TdxOkp<mLubTwGv^lM_sFaDXXxb}+@p2Buh9!4wM%m||uIQ%p=O
uKnpoHEnrcQ;^sdIvhN%JNB%eb&-owm-v!!vng1OBNuYhJ`6r(|Zw>&sjdY>_

diff --git a/mplc/contributivity.py b/mplc/contributivity.py
index 505fbb86..cd3f77a4 100644
--- a/mplc/contributivity.py
+++ b/mplc/contributivity.py
@@ -1124,12 +1124,7 @@ def compute_relative_perf_matrix(self):
     def statistcal_distances_via_smodel(self):
 
         start = timer()
-        try:
-            mpl_pretrain = self.scenario.mpl.pretrain_epochs
-        except AttributeError:
-            mpl_pretrain = 2
-
-        mpl = fast_mpl.FastFedAvgSmodel(self.scenario, mpl_pretrain)
+        mpl = fast_mpl.FastFedAvgSmodel(self.scenario, **self.scenario.mpl_kwargs)
         mpl.fit()
         cross_entropy = tf.keras.metrics.CategoricalCrossentropy()
         self.contributivity_scores = {'Kullback Leiber divergence': [0 for _ in mpl.partners_list],
diff --git a/mplc/multi_partner_learning/basic_mpl.py b/mplc/multi_partner_learning/basic_mpl.py
index bed2e384..8c618e9d 100644
--- a/mplc/multi_partner_learning/basic_mpl.py
+++ b/mplc/multi_partner_learning/basic_mpl.py
@@ -544,7 +544,8 @@ def fit(self):
             for p in self.partners_list:
                 confusion = confusion_matrix(np.argmax(p.y_train, axis=1),
                                              np.argmax(pretrain_model.predict(p.x_train), axis=1),
-                                             normalize='pred')
+                                             normalize='pred',
+                                             labels=list(range(10)))
                 p.noise_layer_weights = [np.log(confusion.T + 1e-8)]
             self.model_weights[:-1] = self.pretrain_mpl.model_weights[:-1]
         else:
diff --git a/mplc/multi_partner_learning/fast_mpl.py b/mplc/multi_partner_learning/fast_mpl.py
index ccf7d902..5471470b 100644
--- a/mplc/multi_partner_learning/fast_mpl.py
+++ b/mplc/multi_partner_learning/fast_mpl.py
@@ -376,7 +376,7 @@ def fit_minibatch(model, partners_minibatches, partners_optimizers, partners_wei
             for p in self.partners_list:
                 confusion = confusion_matrix(np.argmax(p.y_train, axis=1),
                                              np.argmax(self.model.predict(p.x_train), axis=1),
-                                             normalize='pred')
+                                             normalize='pred', labels=list(range(10)))
                 p.noise_layer_weights = [np.log(confusion.T + 1e-8)]
         else:
             for p in self.partners_list:
@@ -549,7 +549,7 @@ def fit_epoch(model, train_dataset, partners_grads, smodel_list, global_grad, ag
             for p in self.partners_list:
                 confusion = confusion_matrix(np.argmax(p.y_train, axis=1),
                                              np.argmax(self.model.predict(p.x_train), axis=1),
-                                             normalize='pred')
+                                             normalize='pred', labels=list(range(10)))
                 p.noise_layer_weights = [np.log(confusion.T + 1e-8)]
         else:
             for p in self.partners_list:

From 078cbca98c48ce0cf81a2121b53fa32b01472141 Mon Sep 17 00:00:00 2001
From: arthurPignet <arthur.pignet@mines-paristech.fr>
Date: Sun, 23 May 2021 16:34:16 +0200
Subject: [PATCH 11/11] Update contrib test Signed-off-by: arthurPignet
 <arthur.pignet@mines-paristech.fr>

Signed-off-by: arthurPignet <arthur.pignet@mines-paristech.fr>
---
 tests/contrib_end_to_end_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/contrib_end_to_end_test.py b/tests/contrib_end_to_end_test.py
index 83b46f29..7781331e 100644
--- a/tests/contrib_end_to_end_test.py
+++ b/tests/contrib_end_to_end_test.py
@@ -71,7 +71,7 @@ def test_all_contrib_methods(self):
         exp.run()
 
         df = exp.result
-        assert len(df) == 2 * len(all_methods)
+        assert len(df) == 2 * (len(all_methods) + 2)  # the S-Model contributivity generates 3 lines per partner
 
     def test_IS_reg_S_contrib(self):
         """