From f546be70c86fa75a91e8f4e71c12ca2f4407dde8 Mon Sep 17 00:00:00 2001 From: Ravi Selker Date: Wed, 29 Apr 2026 16:57:48 +0200 Subject: [PATCH] Adjust Wilcoxon tests for R 4.6 Pratt method R 4.6.0 introduces exact conditional inference (Pratt method) for Wilcoxon tests with ties. This change updates the effect size denominator to match the test's handling of zero-differences, ensuring mathematical consistency and avoiding value shifts when toggling between exact and asymptotic methods. --- R/ttestones.b.R | 55 +++++++++++++++++++++++++--------- R/ttestps.b.R | 34 +++++++++++++++++++-- tests/testthat/testttestones.R | 8 ++--- tests/testthat/testttestps.R | 6 ++-- 4 files changed, 79 insertions(+), 24 deletions(-) diff --git a/R/ttestones.b.R b/R/ttestones.b.R index 65830e80..836db5cb 100644 --- a/R/ttestones.b.R +++ b/R/ttestones.b.R @@ -107,22 +107,51 @@ ttestOneSClass <- R6::R6Class( if (self$options$wilcoxon || self$options$mann) { - if (is.factor(column)) + if (is.factor(column)) { res <- createError(.('Variable is not numeric')) - else if (length(column) == 0) + } else if (length(column) == 0) { res <- createError(.('Variable does not contain enough observations')) - else - res <- try(suppressWarnings(wilcox.test(column, mu=testValue, - alternative=Ha, - paired=FALSE, - conf.int=TRUE, - conf.level=cl)), silent=TRUE) + } else { + # Determine method based on sample size to balance precision and performance. + # R 4.6+ supports exact conditional inference (Pratt's method) for data with + # ties/zeros. For N >= 50, asymptotic approximation is used for efficiency. + useExact <- (n < 50) + + res <- try( + suppressWarnings( + wilcox.test( + column, + mu=testValue, + alternative=Ha, + paired=FALSE, + conf.int=TRUE, + conf.level=cl, + exact=useExact + ) + ), + silent=TRUE + ) + } - if ( ! isError(res)) { - nTies <- sum(column == testValue) - totalRankSum <- ((n-nTies) * ((n-nTies) + 1)) / 2 - biSerial <- (2 * (res$statistic / totalRankSum)) - 1 + if ( ! isError(res)) { + # The Rank Biserial Correlation (effect size) denominator must align with the + # ranking method used by wilcox.test to ensure the value remains within [-1, 1]. + # Pratt's method (used when exact = TRUE) retains zero-differences in the rank pool. + # The asymptotic method (used when exact = FALSE) traditionally excludes zeros. + if (useExact) { + denom_n <- n + } else { + nTies <- sum(column == testValue) + denom_n <- n - nTies + } + + totalRankSum <- (denom_n * (denom_n + 1)) / 2 + + if (totalRankSum > 0) + biSerial <- (2 * (res$statistic / totalRankSum)) - 1 + else + biSerial <- NaN ttest$setRow(rowNo=i, list( "stat[wilc]"=res$statistic, @@ -133,9 +162,7 @@ ttestOneSClass <- R6::R6Class( "es[wilc]"=biSerial, "ciles[wilc]"='', "ciues[wilc]"='')) - } else { - ttest$setRow(rowNo=i, list( "stat[wilc]"=NaN, "p[wilc]"='', diff --git a/R/ttestps.b.R b/R/ttestps.b.R index d3d4b2ae..d495f23c 100644 --- a/R/ttestps.b.R +++ b/R/ttestps.b.R @@ -74,7 +74,22 @@ ttestPSClass <- R6::R6Class( } else { stud <- try(t.test(column1, column2, paired=TRUE, conf.level=confInt, alternative=Ha), silent=TRUE) - wilc <- try(suppressWarnings(wilcox.test(column1, column2, alternative=Ha, paired=TRUE, conf.int=TRUE, conf.level=confInt)), silent=TRUE) + + # Determine method based on sample size to balance precision and performance. + # R 4.6+ supports exact conditional inference (Pratt's method) for data with + # ties/zeros. For N >= 50, asymptotic approximation is used for efficiency. + useExact <- (n < 50) + wilc <- try(suppressWarnings( + wilcox.test( + column1, + column2, + alternative=Ha, + paired=TRUE, + conf.int=TRUE, + conf.level=confInt, + exact=useExact + ) + ), silent=TRUE) } if ( ! isError(stud)) { @@ -115,8 +130,21 @@ ttestPSClass <- R6::R6Class( if ( ! isError(wilc)) { - totalRankSum <- ((n-nTies) * ((n-nTies) + 1)) / 2 - biSerial <- (2 * (wilc$statistic / totalRankSum)) - 1 + # The Rank Biserial Correlation (effect size) denominator must align with the + # ranking method used by wilcox.test to ensure the value remains within [-1, 1]. + # Pratt's method (used when exact = TRUE) retains zero-differences in the rank pool. + # The asymptotic method (used when exact = FALSE) traditionally excludes zeros. + if (useExact) { + denom_n <- n + } else { + denom_n <- n - nTies + } + + totalRankSum <- (denom_n * (denom_n + 1)) / 2 + if (totalRankSum > 0) + biSerial <- (2 * (wilc$statistic / totalRankSum)) - 1 + else + biSerial <- NaN ttestTable$setRow(rowKey=pair, list( 'stat[wilc]'=wilc$statistic, diff --git a/tests/testthat/testttestones.R b/tests/testthat/testttestones.R index ece3ab98..365f5a50 100644 --- a/tests/testthat/testttestones.R +++ b/tests/testthat/testttestones.R @@ -74,9 +74,9 @@ testthat::test_that('Matched rank biserial correlation is correct', { # Test rank biserial correlation ttestTable <- r$ttest$asDF testthat::expect_equal('dif', ttestTable[['var[wilc]']]) - testthat::expect_equal(27, ttestTable[['stat[wilc]']]) - testthat::expect_equal(0.234, ttestTable[['p[wilc]']], tolerance = 1e-3) - testthat::expect_equal(0.5, ttestTable[['es[wilc]']]) + testthat::expect_equal(32, ttestTable[['stat[wilc]']]) + testthat::expect_equal(0.273, ttestTable[['p[wilc]']], tolerance = 1e-3) + testthat::expect_equal(0.422, ttestTable[['es[wilc]']], tolerance = 1e-3) }) testthat::test_that('Matched rank biserial correlation works with non zero test value', { @@ -88,5 +88,5 @@ testthat::test_that('Matched rank biserial correlation works with non zero test # Test rank biserial correlation ttestTable <- r$ttest$asDF - testthat::expect_equal(-0.0303, ttestTable[['es[wilc]']], tolerance = 1e-4) + testthat::expect_equal(-0.0476, ttestTable[['es[wilc]']], tolerance = 1e-3) }) diff --git a/tests/testthat/testttestps.R b/tests/testthat/testttestps.R index 793a9ffb..e2bd863f 100644 --- a/tests/testthat/testttestps.R +++ b/tests/testthat/testttestps.R @@ -95,7 +95,7 @@ testthat::test_that('Matched rank biserial correlation is correct', { ttestTable <- r$ttest$asDF testthat::expect_equal('before', ttestTable[['var1[wilc]']], tolerance = 1e-3) testthat::expect_equal('after', ttestTable[['var2[wilc]']], tolerance = 1e-3) - testthat::expect_equal(9, ttestTable[['stat[wilc]']]) - testthat::expect_equal(0.234, ttestTable[['p[wilc]']], tolerance = 1e-3) - testthat::expect_equal(-0.5, ttestTable[['es[wilc]']]) + testthat::expect_equal(12, ttestTable[['stat[wilc]']]) + testthat::expect_equal(0.273, ttestTable[['p[wilc]']], tolerance = 1e-3) + testthat::expect_equal(-0.467, ttestTable[['es[wilc]']], tolerance = 1e-3) })