From f67ca958add3e8ff8b07643a5923bae04ddfbdd7 Mon Sep 17 00:00:00 2001 From: qbarthelemy Date: Sat, 27 Sep 2025 02:22:56 +0200 Subject: [PATCH 1/2] improve code for compute_pvals_perm --- moabb/analysis/meta_analysis.py | 34 ++++++++++++++++----------------- moabb/tests/test_analysis.py | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/moabb/analysis/meta_analysis.py b/moabb/analysis/meta_analysis.py index 5bb813af3..022a70a89 100644 --- a/moabb/analysis/meta_analysis.py +++ b/moabb/analysis/meta_analysis.py @@ -74,21 +74,21 @@ def compute_pvals_wilcoxon(df, order=None): return out -def _pairedttest_exhaustive(data): - """Exhaustive paired t-test for permutation tests. +def _pairedttest_exact(data): + """Exact paired t-test. - Returns p-values for exhaustive ttest that runs through all possible + Returns p-values for exact t-test that runs through all possible permutations of the first dimension. Very bad idea for size greater than 12 Parameters ---------- data: ndarray of shape (n_subj, n_pipelines, n_pipelines) - Array of differences between scores for each pair of algorithms per subject + Differences between scores for each pair of pipelines per subject Returns ------- pvals: ndarray of shape (n_pipelines, n_pipelines) - array of pvalues + pvalues """ out = np.zeros(data.shape[1:], dtype=np.int32) true = data.sum(axis=0) @@ -103,7 +103,7 @@ def _pairedttest_exhaustive(data): # Correct for p-values equal to 1 # as they are invalid p-values for Stouffer's method. - # Note: as this is an exhaustive permutation test, + # Note: as this is an exact test, # one of the t-test is computed with the original statistic # So in practice out cannot contain zeros. @@ -113,27 +113,25 @@ def _pairedttest_exhaustive(data): def _pairedttest_random(data, nperms, seed=None): - """Return p-values based on nperms permutations of a paired ttest. + """Randomized paired t-test. - data is a (subj, alg, alg) matrix of differences between scores for each - pair of algorithms per subject + Returns p-values based on nperms permutations of a paired t-test. Parameters ---------- data: ndarray of shape (n_subj, n_pipelines, n_pipelines) - Array of differences between scores for each pair of algorithms per subject + Differences between scores for each pair of pipelines per subject Returns ------- pvals: ndarray of shape (n_pipelines, n_pipelines) - array of pvalues + pvalues """ rng = check_random_state(seed) out = np.ones(data.shape[1:], dtype=np.int32) true = data.sum(axis=0) for _ in range(nperms): - perm = rng.randint(2, size=(data.shape[0],)) - perm[perm == 0] = -1 + perm = rng.choice([1, -1], size=(data.shape[0],), replace=True) # multiply permutation by subject dimension and sum over subjects randperm = (data * perm[:, None, None]).sum(axis=0) # compare to true difference (numpy autocasts bool to 0/1) @@ -151,7 +149,7 @@ def _pairedttest_random(data, nperms, seed=None): def compute_pvals_perm(df, order=None, seed=None): """Compute permutation test on aggregated results. - Returns kxk matrix of p-values computed via permutation test, + Returns a square matrix of p-values computed via permutation test, order defines the order of rows and columns Parameters @@ -159,15 +157,15 @@ def compute_pvals_perm(df, order=None, seed=None): df: DataFrame Aggregated results, samples are index, columns are pipelines, and values are scores - order: list - list of length (num algorithms) with names corresponding to df columns + order: list of length (n_pipelines) + Names corresponding to df columns seed: int | None random seed for reproducibility Returns ------- pvals: ndarray of shape (n_pipelines, n_pipelines) - array of pvalues + pvalues """ if order is None: order = df.columns @@ -185,7 +183,7 @@ def compute_pvals_perm(df, order=None, seed=None): if data.shape[0] > 13: p = _pairedttest_random(data, 10000, seed=seed) else: - p = _pairedttest_exhaustive(data) + p = _pairedttest_exact(data) return p diff --git a/moabb/tests/test_analysis.py b/moabb/tests/test_analysis.py index b397d02a1..355d931b2 100644 --- a/moabb/tests/test_analysis.py +++ b/moabb/tests/test_analysis.py @@ -165,7 +165,7 @@ def test_compute_pvals_random_cannot_be_zero(self): n_perms = 10000 # hardcoded in _pairedttest_random pvals = ma.compute_pvals_perm(df, seed=rng) p1vsp2 = pvals[0, 1] - assert p1vsp2 >= 1 / n_perms, "P-values cannot be zero " + assert p1vsp2 >= 1 / n_perms, f"P-values cannot be zero {pvals}" class TestResults: From c7d064fa6b37d7dbee570652b66225cebaa5c5fd Mon Sep 17 00:00:00 2001 From: qbarthelemy Date: Sat, 27 Sep 2025 02:31:59 +0200 Subject: [PATCH 2/2] complete whatsnew --- docs/source/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 23cd88dd1..7be58e54e 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -22,6 +22,7 @@ Enhancements - Adding :class:`moabb.datasets.Kojima2024A` (:gh:`807` by `Simon Kojima`_) - Adding :class:`moabb.datasets.Kojima2024B` (:gh:`806` by `Simon Kojima`_) - Add new dataset :class:`moabb.datasets.BNCI2003_IVa` dataset (:gh:`811` by `Griffin Keeler`_) +- Improve compute_pvals_perm function (:gh:`818` by `Quentin Barthelemy`_) Bugs ~~~~