Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion phackR/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ Suggests:
testthat,
knitr,
rmarkdown
RoxygenNote: 7.2.1
RoxygenNote: 7.3.3
VignetteBuilder: knitr
5 changes: 1 addition & 4 deletions phackR/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@ importFrom(R.devices,suppressGraphics)
importFrom(WRS2,yuen)
importFrom(aplpack,stem.leaf)
importFrom(car,Anova)
importFrom(dplyr,"%>%")
importFrom(dplyr,all_of)
importFrom(dplyr,do)
importFrom(dplyr,group_by_at)
importFrom(dplyr,mutate)
importFrom(ggplot2,aes)
importFrom(ggplot2,annotation_custom)
Expand All @@ -46,6 +43,7 @@ importFrom(grid,gpar)
importFrom(grid,grobTree)
importFrom(grid,textGrob)
importFrom(magrittr,"%$%")
importFrom(magrittr,"%>%")
importFrom(mice,complete)
importFrom(mvoutlier,uni.plot)
importFrom(pbapply,pblapply)
Expand Down Expand Up @@ -73,4 +71,3 @@ importFrom(stats,sd)
importFrom(stats,t.test)
importFrom(stats,wilcox.test)
importFrom(utils,capture.output)
importFrom(utils,tail)
84 changes: 50 additions & 34 deletions phackR/R/combinedStrategies_ttest.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,18 @@
.combined.t.hack <- function(df, roundinglevel = 0.051, alternative = "two.sided", strategy = "firstsig", alpha = 0.05){

####################### (1) Original p-value ###################

control.orig <- df[df$group == 1, "DV1"]
treatment.orig <- df[df$group == 2, "DV1"]

# Original p-value and effect sizes
test.orig <- stats::t.test(DV1 ~ group,
data = df,
var.equal = TRUE,
alternative = alternative)
p.orig <- test.orig$p.value
r2.orig <- .compR2t(df[df$group == 1, "DV1"], df[df$group == 2, "DV1"])
d.orig <- .compCohensD(unname(test.orig$statistic), nrow(df)/2)
report.orig <- .report.twogroup(control = control.orig,
treatment = treatment.orig,
method = "t.equal",
alternative = alternative)
p.orig <- report.orig[["p"]]
r2.orig <- .compR2t(control.orig, treatment.orig)
d.orig <- report.orig[["effect"]]

# If original p-value is significant stop and return original p-value
if(p.orig <= alpha) return(list(p.final = p.orig,
Expand All @@ -80,22 +83,26 @@
########### (2) Exploit statistical analysis options #################

# Welch test
p.welch <- stats::t.test(DV1 ~ group,
data = df,
var.equal = FALSE,
alternative = alternative)$p.value
p.welch <- .report.twogroup(control = control.orig,
treatment = treatment.orig,
method = "t.welch",
alternative = alternative)[["p"]]

# Mann-Whitney / Wilcoxon test
p.wilcox <- stats::wilcox.test(DV1 ~ group,
alternative = alternative,
data = df)$p.value
p.wilcox <- .report.twogroup(control = control.orig,
treatment = treatment.orig,
method = "wilcox",
alternative = alternative)[["p"]]

# Yuen test with different levels of trimming
p.yuen <- rep(NA, 4)
trim <- c(0.1, 0.15, 0.2, 0.25)
for(i in 1:4) {
p.yuen[i] <- WRS2::yuen(DV1 ~ group, tr = trim[i],
data = df)$p.value
p.yuen[i] <- .report.twogroup(control = control.orig,
treatment = treatment.orig,
method = "yuen",
trim = trim[i],
alternative = alternative)[["p"]]
}

ps <- c(p.orig, p.welch, p.wilcox, p.yuen)
Expand All @@ -120,19 +127,20 @@
r2s <- NULL

for(i in 1:sum(grepl("DV", colnames(df)))){

mod[[i]] <- stats::t.test(df[,paste0("DV", i)] ~ df$group,
var.equal = TRUE,
alternative = alternative)
control.current <- df[df$group == 1, paste0("DV", i)]
treatment.current <- df[df$group == 2, paste0("DV", i)]
mod[[i]] <- .report.twogroup(control = control.current,
treatment = treatment.current,
method = "t.equal",
alternative = alternative)

r2s[i] <- .compR2t(df[df$group == 1, paste0("DV", i)],
df[df$group == 2, paste0("DV", i)])
r2s[i] <- .compR2t(control.current, treatment.current)

}

ps <- unlist(simplify2array(mod)["p.value", ])
ps <- vapply(mod, function(x) x[["p"]], numeric(1))

ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), nrow(df)/2)
ds <- vapply(mod, function(x) x[["effect"]], numeric(1))

# Select final p-hacked p-value based on strategy
p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = roundinglevel)
Expand Down Expand Up @@ -208,16 +216,24 @@

# Compute t-test for each subgroup of the subgroup variables
for(i in 1:length(whichsub)){

tmp <- dplyr::group_by_at(df, whichsub[i]) %>%
dplyr::do(as.data.frame(stats::t.test(.data$DV1 ~ .data$group, var.equal = TRUE, alternative = alternative)[c("p.value", "statistic")]))
tmp2 <- dplyr::group_by_at(df, whichsub[i]) %>%
dplyr::do(as.data.frame(table(.data$group)))
tmp3 <- dplyr::group_by_at(df, whichsub[i]) %>% do(as.data.frame(.compR2t(.data$DV1[.data$group == unique(.data$group)[1]], .data$DV1[.data$group == unique(.data$group)[2]])))

ps[[i]] <- tmp[[2]]
ds[[i]] <- c(tmp[[3]][1]*sqrt(sum(1/tmp2[[3]][1:2])), tmp[[3]][2]*sqrt(sum(1/tmp2[[3]][3:4])))
r2s[[i]] <- tmp3[[2]]
subgroup.values <- sort(unique(df[, whichsub[i]]))
reports <- vector("list", length(subgroup.values))
r2.current <- rep(NA_real_, length(subgroup.values))

for(j in 1:length(subgroup.values)){
subset.df <- df[df[, whichsub[i]] == subgroup.values[j], , drop = FALSE]
control.sub <- subset.df[subset.df$group == 1, "DV1"]
treatment.sub <- subset.df[subset.df$group == 2, "DV1"]
reports[[j]] <- .report.twogroup(control = control.sub,
treatment = treatment.sub,
method = "t.equal",
alternative = alternative)
r2.current[j] <- .compR2t(control.sub, treatment.sub)
}

ps[[i]] <- vapply(reports, function(x) x[["p"]], numeric(1))
ds[[i]] <- vapply(reports, function(x) x[["effect"]], numeric(1))
r2s[[i]] <- r2.current

}

Expand Down
89 changes: 38 additions & 51 deletions phackR/R/compositeScores.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
#' @param nobs Integer giving number of observations
#' @param ncompv Integer giving number of variables to build the composite score
#' @param rcomp Correlation between the composite score variables
#' @param effect Mean effect size across studies on the Fisher-z scale
#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale

.sim.compscore <- function(nobs, ncompv, rcomp){

dv <- rnorm(nobs, 0, 1)
.sim.compscore <- function(nobs, ncompv, rcomp, effect = 0, heterogeneity = 0){

iv <- .sim.multcor(nobs = nobs, nvar = ncompv, r = rcomp)
theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
rho <- .fisherz_to_r(theta)
compscore <- scale(rowMeans(iv))[,1]
dv <- rho*compscore + sqrt(1-rho^2)*rnorm(nobs, 0, 1)

res <- cbind(dv, iv)

Expand All @@ -34,13 +38,12 @@
stopifnot(length(compv)-ndelete >= 2)

# Compute original p-value and R^2 with full scale
modres <- summary(lm(df[, dv] ~ rowMeans(df[, compv])))
p.orig <- modres$coefficients[2, 4]
r2.orig <- modres$r.squared
fullscale <- rowMeans(df[, compv, drop = FALSE])
report.orig <- .report.association(x = fullscale, y = df[, dv], method = "scale.full")
analyses <- list(list(report = report.orig,
r2 = tanh(report.orig[["effect"]])^2))

# Prepare and initialize variables for p-hacking
ps <- list()
r2s <- list()
compscale <- df[, compv]
changescale <- df[, compv]
out <- NULL
Expand All @@ -55,62 +58,59 @@
out[i] <- which(colnames(compscale) %in% colnames(changescale)[which.max(performance::item_reliability(changescale)[,2])])

# Compute p-value for the new composite score
newscore <- rowMeans(compscale[, -out])
newmodres <- summary(lm(df[, dv] ~ newscore))
pval[1] <- newmodres$coefficients[2, 4]
r2val[1] <- newmodres$r.squared
newscore <- rowMeans(compscale[, -out, drop = FALSE])
report.new <- .report.association(x = newscore, y = df[, dv],
method = paste0("scale.delete.", paste(out, collapse = "-")))
analyses[[length(analyses)+1]] <- list(report = report.new,
r2 = tanh(report.new[["effect"]])^2)

# Compute p-value for the item deleted from the score
itemscore <- compscale[, out[i]]
newmodres2 <- summary(lm(df[, dv] ~ itemscore))
pval[2] <- newmodres2$coefficients[2, 4]
r2val[2] <- newmodres2$r.squared

# Compute p-value for a scale of all items deleted so far
#nonscore <- rowMeans(cbind(compscale[, out]))
#newmodres3 <- summary(lm(df[, dv] ~ nonscore))
#pval[3] <- newmodres3$coefficients[2, 4]
#r2val[3] <- newmodres3$r.squared

changescale <- compscale[, -out]
ps[[i]] <- pval
r2s[[i]] <- r2val
report.item <- .report.association(x = itemscore, y = df[, dv],
method = paste0("item.", out[i]))
analyses[[length(analyses)+1]] <- list(report = report.item,
r2 = tanh(report.item[["effect"]])^2)

changescale <- compscale[, -out, drop = FALSE]
}

ps <- c(p.orig, unique(unlist(ps)))
r2s <- c(r2.orig, unique(unlist(r2s)))
ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))

# Select final p-hacked p-value based on strategy
p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
r2.final <- unique(r2s[ps == p.final])
final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)

return(list(p.final = p.final,
ps = ps,
r2.final = r2.final,
r2s = r2s))
return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
ps.orig = analyses[[1]][["report"]][["p"]],
r2s.hack = analyses[[final.index]][["r2"]],
r2s.orig = analyses[[1]][["r2"]],
report.initial = analyses[[1]][["report"]],
report.final = analyses[[final.index]][["report"]]))

}

#' Simulate p-hacking with composite scores
#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
#' @param nobs Integer giving number of observations
#' @param ncompv Integer giving number of variables to build the composite score
#' @param rcomp Correlation between the composite score variables
#' @param ndelete How many items should be deleted from the scale at maximum?
#' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
#' @param effect Mean effect size across studies on the Fisher-z scale
#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
#' @param alpha Significance level of the t-test (default: 0.05)
#' @param iter Number of simulation iterations
#' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
#' @importFrom pbapply pblapply
#' @importFrom shiny withProgress incProgress
#' @export

sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv=FALSE){
sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv=FALSE){

# Simulate as many datasets as desired iterations
dat <- list()
for(i in 1:iter){
dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp)
dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp,
effect = effect, heterogeneity = heterogeneity)
}

# Apply p-hacking procedure to each dataset (with progress bar within or outside Shiny)
Expand All @@ -135,20 +135,7 @@ sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig
})
}

ps.hack <- NULL
ps.orig <- NULL
r2s.orig <- NULL
r2s.hack <- NULL

for(i in 1:iter){
ps.hack[i] <- res[[i]][["p.final"]]
ps.orig[i] <- res[[i]][["ps"]][1]
r2s.hack[i] <- res[[i]][["r2.final"]]
r2s.orig[i] <- res[[i]][["r2s"]][1]
}

res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)

return(res)
.combine.phase1.results(res = res,
legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))

}
Loading