diff --git a/phackR/DESCRIPTION b/phackR/DESCRIPTION
index 4a10d8d..71447fa 100644
--- a/phackR/DESCRIPTION
+++ b/phackR/DESCRIPTION
@@ -12,5 +12,5 @@ Suggests:
     testthat,
     knitr,
     rmarkdown
-RoxygenNote: 7.2.1
+RoxygenNote: 7.3.3
 VignetteBuilder: knitr
diff --git a/phackR/NAMESPACE b/phackR/NAMESPACE
index 30abd96..59e4b43 100644
--- a/phackR/NAMESPACE
+++ b/phackR/NAMESPACE
@@ -17,10 +17,7 @@ importFrom(R.devices,suppressGraphics)
 importFrom(WRS2,yuen)
 importFrom(aplpack,stem.leaf)
 importFrom(car,Anova)
-importFrom(dplyr,"%>%")
 importFrom(dplyr,all_of)
-importFrom(dplyr,do)
-importFrom(dplyr,group_by_at)
 importFrom(dplyr,mutate)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,annotation_custom)
@@ -46,6 +43,7 @@ importFrom(grid,gpar)
 importFrom(grid,grobTree)
 importFrom(grid,textGrob)
 importFrom(magrittr,"%$%")
+importFrom(magrittr,"%>%")
 importFrom(mice,complete)
 importFrom(mvoutlier,uni.plot)
 importFrom(pbapply,pblapply)
@@ -73,4 +71,3 @@ importFrom(stats,sd)
 importFrom(stats,t.test)
 importFrom(stats,wilcox.test)
 importFrom(utils,capture.output)
-importFrom(utils,tail)
diff --git a/phackR/R/combinedStrategies_ttest.R b/phackR/R/combinedStrategies_ttest.R
index a4add34..6111522 100644
--- a/phackR/R/combinedStrategies_ttest.R
+++ b/phackR/R/combinedStrategies_ttest.R
@@ -48,15 +48,18 @@
 .combined.t.hack <- function(df, roundinglevel = 0.051, alternative = "two.sided", strategy = "firstsig", alpha = 0.05){
   
   ####################### (1) Original p-value ###################
+
+  control.orig <- df[df$group == 1, "DV1"]
+  treatment.orig <- df[df$group == 2, "DV1"]
   
   # Original p-value and effect sizes
-  test.orig   <- stats::t.test(DV1 ~ group, 
-                             data = df, 
-                             var.equal = TRUE, 
-                             alternative = alternative)
-  p.orig      <- test.orig$p.value
-  r2.orig     <- .compR2t(df[df$group == 1, "DV1"], df[df$group == 2, "DV1"])
-  d.orig      <- .compCohensD(unname(test.orig$statistic), nrow(df)/2)
+  report.orig <- .report.twogroup(control = control.orig,
+                                  treatment = treatment.orig,
+                                  method = "t.equal",
+                                  alternative = alternative)
+  p.orig      <- report.orig[["p"]]
+  r2.orig     <- .compR2t(control.orig, treatment.orig)
+  d.orig      <- report.orig[["effect"]]
   
   # If original p-value is significant stop and return original p-value
   if(p.orig <= alpha) return(list(p.final = p.orig,
@@ -80,22 +83,26 @@
   ########### (2) Exploit statistical analysis options #################
   
   # Welch test
-  p.welch <- stats::t.test(DV1 ~ group, 
-                           data = df,
-                           var.equal = FALSE,
-                           alternative = alternative)$p.value
+  p.welch <- .report.twogroup(control = control.orig,
+                              treatment = treatment.orig,
+                              method = "t.welch",
+                              alternative = alternative)[["p"]]
   
   # Mann-Whitney / Wilcoxon test
-  p.wilcox <- stats::wilcox.test(DV1 ~ group,
-                                 alternative = alternative,
-                                 data = df)$p.value
+  p.wilcox <- .report.twogroup(control = control.orig,
+                               treatment = treatment.orig,
+                               method = "wilcox",
+                               alternative = alternative)[["p"]]
   
   # Yuen test with different levels of trimming
   p.yuen <- rep(NA, 4)
   trim <- c(0.1, 0.15, 0.2, 0.25)
   for(i in 1:4) {
-    p.yuen[i] <- WRS2::yuen(DV1 ~ group, tr = trim[i],
-                            data = df)$p.value
+    p.yuen[i] <- .report.twogroup(control = control.orig,
+                                  treatment = treatment.orig,
+                                  method = "yuen",
+                                  trim = trim[i],
+                                  alternative = alternative)[["p"]]
   }
   
   ps <- c(p.orig, p.welch, p.wilcox, p.yuen)
@@ -120,19 +127,20 @@
   r2s <- NULL
   
   for(i in 1:sum(grepl("DV", colnames(df)))){
-    
-    mod[[i]]  <- stats::t.test(df[,paste0("DV", i)] ~ df$group,
-                               var.equal = TRUE, 
-                               alternative = alternative)
+    control.current <- df[df$group == 1, paste0("DV", i)]
+    treatment.current <- df[df$group == 2, paste0("DV", i)]
+    mod[[i]]  <- .report.twogroup(control = control.current,
+                                  treatment = treatment.current,
+                                  method = "t.equal",
+                                  alternative = alternative)
                               
-    r2s[i]    <- .compR2t(df[df$group == 1, paste0("DV", i)], 
-                          df[df$group == 2, paste0("DV", i)])
+    r2s[i]    <- .compR2t(control.current, treatment.current)
                        
   }
   
-  ps <- unlist(simplify2array(mod)["p.value", ])
+  ps <- vapply(mod, function(x) x[["p"]], numeric(1))
   
-  ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), nrow(df)/2)
+  ds <- vapply(mod, function(x) x[["effect"]], numeric(1))
   
   # Select final p-hacked p-value based on strategy
   p.final   <- .selectpvalue(ps = ps, strategy = strategy, alpha = roundinglevel)
@@ -208,16 +216,24 @@
   
   # Compute t-test for each subgroup of the subgroup variables
   for(i in 1:length(whichsub)){
-    
-    tmp <- dplyr::group_by_at(df, whichsub[i]) %>%
-      dplyr::do(as.data.frame(stats::t.test(.data$DV1 ~ .data$group, var.equal = TRUE, alternative = alternative)[c("p.value", "statistic")]))
-    tmp2 <- dplyr::group_by_at(df, whichsub[i]) %>%
-      dplyr::do(as.data.frame(table(.data$group)))
-    tmp3 <- dplyr::group_by_at(df, whichsub[i]) %>% do(as.data.frame(.compR2t(.data$DV1[.data$group == unique(.data$group)[1]], .data$DV1[.data$group == unique(.data$group)[2]])))
-    
-    ps[[i]] <- tmp[[2]]
-    ds[[i]] <- c(tmp[[3]][1]*sqrt(sum(1/tmp2[[3]][1:2])), tmp[[3]][2]*sqrt(sum(1/tmp2[[3]][3:4])))
-    r2s[[i]] <- tmp3[[2]]
+    subgroup.values <- sort(unique(df[, whichsub[i]]))
+    reports <- vector("list", length(subgroup.values))
+    r2.current <- rep(NA_real_, length(subgroup.values))
+
+    for(j in 1:length(subgroup.values)){
+      subset.df <- df[df[, whichsub[i]] == subgroup.values[j], , drop = FALSE]
+      control.sub <- subset.df[subset.df$group == 1, "DV1"]
+      treatment.sub <- subset.df[subset.df$group == 2, "DV1"]
+      reports[[j]] <- .report.twogroup(control = control.sub,
+                                       treatment = treatment.sub,
+                                       method = "t.equal",
+                                       alternative = alternative)
+      r2.current[j] <- .compR2t(control.sub, treatment.sub)
+    }
+
+    ps[[i]] <- vapply(reports, function(x) x[["p"]], numeric(1))
+    ds[[i]] <- vapply(reports, function(x) x[["effect"]], numeric(1))
+    r2s[[i]] <- r2.current
     
   }
   
diff --git a/phackR/R/compositeScores.R b/phackR/R/compositeScores.R
index 7b767e8..5468cfe 100644
--- a/phackR/R/compositeScores.R
+++ b/phackR/R/compositeScores.R
@@ -6,12 +6,16 @@
 #' @param nobs Integer giving number of observations
 #' @param ncompv Integer giving number of variables to build the composite score
 #' @param rcomp Correlation between the composite score variables
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 
-.sim.compscore <- function(nobs, ncompv, rcomp){
-
-  dv <- rnorm(nobs, 0, 1)
+.sim.compscore <- function(nobs, ncompv, rcomp, effect = 0, heterogeneity = 0){
 
   iv <- .sim.multcor(nobs = nobs, nvar = ncompv, r = rcomp)
+  theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+  rho <- .fisherz_to_r(theta)
+  compscore <- scale(rowMeans(iv))[,1]
+  dv <- rho*compscore + sqrt(1-rho^2)*rnorm(nobs, 0, 1)
 
   res <- cbind(dv, iv)
 
@@ -34,13 +38,12 @@
   stopifnot(length(compv)-ndelete >= 2)
 
   # Compute original p-value and R^2 with full scale
-  modres <- summary(lm(df[, dv] ~ rowMeans(df[, compv])))
-  p.orig <- modres$coefficients[2, 4]
-  r2.orig <- modres$r.squared
+  fullscale <- rowMeans(df[, compv, drop = FALSE])
+  report.orig <- .report.association(x = fullscale, y = df[, dv], method = "scale.full")
+  analyses <- list(list(report = report.orig,
+                        r2 = tanh(report.orig[["effect"]])^2))
 
   # Prepare and initialize variables for p-hacking
-  ps <- list()
-  r2s <- list()
   compscale <- df[, compv]
   changescale <- df[, compv]
   out <- NULL
@@ -55,49 +58,45 @@
     out[i] <- which(colnames(compscale) %in% colnames(changescale)[which.max(performance::item_reliability(changescale)[,2])])
 
     # Compute p-value for the new composite score
-    newscore <- rowMeans(compscale[, -out])
-    newmodres <- summary(lm(df[, dv] ~ newscore))
-    pval[1] <- newmodres$coefficients[2, 4]
-    r2val[1] <- newmodres$r.squared
+    newscore <- rowMeans(compscale[, -out, drop = FALSE])
+    report.new <- .report.association(x = newscore, y = df[, dv],
+                                      method = paste0("scale.delete.", paste(out, collapse = "-")))
+    analyses[[length(analyses)+1]] <- list(report = report.new,
+                                           r2 = tanh(report.new[["effect"]])^2)
 
     # Compute p-value for the item deleted from the score
     itemscore <- compscale[, out[i]]
-    newmodres2 <- summary(lm(df[, dv] ~ itemscore))
-    pval[2] <- newmodres2$coefficients[2, 4]
-    r2val[2] <- newmodres2$r.squared
-
-    # Compute p-value for a scale of all items deleted so far
-    #nonscore <- rowMeans(cbind(compscale[, out]))
-    #newmodres3 <- summary(lm(df[, dv] ~ nonscore))
-    #pval[3] <- newmodres3$coefficients[2, 4]
-    #r2val[3] <- newmodres3$r.squared
-
-    changescale <- compscale[, -out]
-    ps[[i]] <- pval
-    r2s[[i]] <- r2val
+    report.item <- .report.association(x = itemscore, y = df[, dv],
+                                       method = paste0("item.", out[i]))
+    analyses[[length(analyses)+1]] <- list(report = report.item,
+                                           r2 = tanh(report.item[["effect"]])^2)
+
+    changescale <- compscale[, -out, drop = FALSE]
   }
 
-  ps <- c(p.orig, unique(unlist(ps)))
-  r2s <- c(r2.orig, unique(unlist(r2s)))
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-hacking with composite scores
-#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs Integer giving number of observations
 #' @param ncompv Integer giving number of variables to build the composite score
 #' @param rcomp Correlation between the composite score variables
 #' @param ndelete How many items should be deleted from the scale at maximum?
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
@@ -105,12 +104,13 @@
 #' @importFrom shiny withProgress incProgress
 #' @export
 
-sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv=FALSE){
+sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv=FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp)
+    dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp,
+                               effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset (with progress bar within or outside Shiny)
@@ -135,20 +135,7 @@ sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.orig <- NULL
-  r2s.hack <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 
 }
diff --git a/phackR/R/exploitCovariates.R b/phackR/R/exploitCovariates.R
index 87c33a8..86ab7e6 100644
--- a/phackR/R/exploitCovariates.R
+++ b/phackR/R/exploitCovariates.R
@@ -8,12 +8,14 @@
 #' @param ncov Number of continuous covariates in the simulated data frame
 #' @param rcov Correlation between the covariates
 #' @param rcovdv Correlation between covariates and dependent variable
+#' @param effect Mean effect size across studies on the standardized mean-difference scale
+#' @param heterogeneity Between-study heterogeneity on the standardized mean-difference scale
 #' @param mu Mean of the random data
 #' @param sd Standard deviation of the random data
 #' @param missing Proportion of missing values per variable (e.g., 0.2 = 20 percent)
 #' @importFrom stats rnorm
 
-.sim.covariates <- function(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0){
+.sim.covariates <- function(nobs.group, ncov, rcov, rcovdv, effect = 0, heterogeneity = 0, mu = 0, sd = 1, missing = 0, theta = NULL){
 
   # Observations per group and total observations
   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
@@ -37,6 +39,8 @@
 
   # create raw data from matrix multiplication of U and random noise
   X <- as.data.frame(t(U %*% random.normal))
+  if(is.null(theta)) theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+  X[group == 2, 1] <- X[group == 2, 1] + theta
 
   # create final simulated data matrix
   Xfull <- cbind(group, X)
@@ -73,8 +77,7 @@
   colnames(df)[covs] <- paste0("CV", 1:length(covs))
   df <- df[, c(dv, group, covs)]
 
-  ps <- NULL
-  eta2s <- NULL # partial eta^2
+  analyses <- list()
 
   # Compute correlations between covariates and dependent variable and order covariates accordingly
   dvcors <- apply(X = df[,-group], MARGIN = 2, FUN = function(x) stats::cor(x, df$dv))[-1]
@@ -105,43 +108,54 @@
 
     res <- stats::aov(stats::as.formula(models[i]), data = df)
     resanc <- car::Anova(res, type = 2)
-    ps[i] <- resanc["group", "Pr(>F)"]
-    eta2s[i] <- resanc["group", "Sum Sq"]/(resanc["group", "Sum Sq"] + resanc["Residuals", "Sum Sq"])
+    report <- .report.group_lm(formula = stats::as.formula(models[i]),
+                               data = df,
+                               groupvar = "group",
+                               method = paste0("ancova.", gsub(" ", "", models[i])))
+    analyses[[i]] <- list(report = report,
+                          eta2 = resanc["group", "Sum Sq"]/(resanc["group", "Sum Sq"] + resanc["Residuals", "Sum Sq"]))
 
   }
 
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
+
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  eta2.final <- unique(eta2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              eta2.final = eta2.final,
-              eta2s = eta2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              eta2s.hack = analyses[[final.index]][["eta2"]],
+              eta2s.orig = analyses[[1]][["eta2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 
 }
 
 #' Simulate p-Hacking with multiple covariates
-#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs.group Vector with number of observations per group
 #' @param ncov Number of continuous covariates in the simulated data frame
 #' @param rcov Correlation between the covariates
 #' @param rcovdv Correlation between covariates and dependent variable
 #' @param interactions Should interaction terms be added to the ANCOVA models? TRUE/FALSE
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the standardized mean-difference scale
+#' @param heterogeneity Between-study heterogeneity on the standardized mean-difference scale
 #' @param alpha Significance level of the t-test
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
 
-sim.covhack <- function(nobs.group, ncov, rcov, rcovdv, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.covhack <- function(nobs.group, ncov, rcov, rcovdv, interactions = FALSE, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.covariates(nobs.group = nobs.group, ncov = ncov, rcov = rcov, rcovdv = rcovdv)
+    dat[[i]] <- .sim.covariates(nobs.group = nobs.group, ncov = ncov, rcov = rcov,
+                                rcovdv = rcovdv, effect = effect,
+                                heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -164,21 +178,8 @@ sim.covhack <- function(nobs.group, ncov, rcov, rcovdv, interactions = FALSE, st
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  eta2s.hack <- NULL
-  eta2s.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    eta2s.hack[i] <- res[[i]][["eta2.final"]]
-    eta2s.orig[i] <- res[[i]][["eta2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, eta2s.hack, eta2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "eta2s.hack", "eta2s.orig"))
 
 
 }
diff --git a/phackR/R/exploitCutoffs.R b/phackR/R/exploitCutoffs.R
index f639e13..29199db 100644
--- a/phackR/R/exploitCutoffs.R
+++ b/phackR/R/exploitCutoffs.R
@@ -16,59 +16,66 @@
 
   iv <- df[, iv]
   dv <- df[, dv]
+  analyses <- list()
 
-  mod.orig <- summary(stats::lm(dv ~ iv))
-  p.orig <- mod.orig$coefficients[2, 4]
-  r2.orig <- mod.orig$r.squared
+  report.orig <- .report.association(x = iv, y = dv, method = "lm.continuous")
+  analyses[[1]] <- list(report = report.orig,
+                        r2 = tanh(report.orig[["effect"]])^2)
 
   # Do the mediansplit
   mediansplitvar <- as.numeric(iv > stats::median(iv)) + 1
-  p.mediansplit <- stats::t.test(dv[mediansplitvar == 1], dv[mediansplitvar == 2],
-                          var.equal = TRUE, alternative = "two.sided")$p.value
-  r2.mediansplit <- .compR2t(dv[mediansplitvar == 1], dv[mediansplitvar == 2])
+  report.mediansplit <- .report.association(x = mediansplitvar, y = dv,
+                                            method = "lm.mediansplit")
+  analyses[[2]] <- list(report = report.mediansplit,
+                        r2 = .compR2t(dv[mediansplitvar == 1], dv[mediansplitvar == 2]))
 
   # Cut the middle
   tertiles <- as.numeric(stats::quantile(iv, probs = c(1/3, 2/3)))
   threecut <- cut(iv, breaks = c(-Inf, tertiles, Inf), labels = c(1,0,2))
   dv2 <- dv[threecut %in% c(1,2)]
   threecut2 <- threecut[threecut %in% c(1, 2)]
-  p.cutmiddle <- stats::t.test(dv2[threecut2 == 2], dv2[threecut2 == 1],
-                        var.equal = TRUE, alternative = "two.sided")$p.value
-  r2.cutmiddle <- .compR2t(dv2[threecut2 == 2], dv2[threecut2 == 1])
+  report.cutmiddle <- .report.association(x = as.numeric(as.character(threecut2)),
+                                          y = dv2,
+                                          method = "lm.cutmiddle")
+  analyses[[3]] <- list(report = report.cutmiddle,
+                        r2 = .compR2t(dv2[threecut2 == 2], dv2[threecut2 == 1]))
 
   # 3 Categories: Omnibus test
-  mod.threecat <- summary(stats::aov(dv ~ threecut))
-  p.threecat <- mod.threecat[[1]][[5]][1]
-  r2.threecat <- mod.threecat[[1]][1,2]/sum(mod.threecat[[1]][,2])
+  report.threecat <- .report.multicat(group = threecut, y = dv,
+                                      method = "anova.threecut")
+  analyses[[4]] <- list(report = report.threecat,
+                        r2 = tanh(report.threecat[["effect"]])^2)
 
-  ps <- c(p.orig, p.mediansplit, p.cutmiddle, p.threecat)
-  r2s <- c(r2.orig, r2.mediansplit, r2.cutmiddle, r2.threecat)
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-Hacking for exploiting cutoff values
-#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs Number of observations
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 #' @param alpha Significance level of the t-test
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.cutoffHack <- function(nobs, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.cutoffHack <- function(nobs, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
+    dat[[i]] <- .sim.association(nobs = nobs, effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -92,22 +99,7 @@ sim.cutoffHack <- function(nobs, strategy = "firstsig", alpha = 0.05, iter = 100
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
-
-
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 
 }
diff --git a/phackR/R/favorableImputation.R b/phackR/R/favorableImputation.R
index 50d7ae6..663293a 100644
--- a/phackR/R/favorableImputation.R
+++ b/phackR/R/favorableImputation.R
@@ -54,124 +54,130 @@
   # Stop if imputation methods are not defined
   stopifnot(any(c(1:10) %in% which))
 
-  # Initialize result vector
-  ps <- rep(NA, 10)
-  r2s <- rep(NA, 10)
+  analyses <- list()
 
   # p-value when missing values are deleted
   if(1 %in% which){
-    mod1 <- summary(stats::lm(y ~ x, na.action = "na.omit"))
-    ps[1] <- mod1$coefficients[2, 4]
-    r2s[1] <- mod1$r.squared
+    report <- .report.association(x = x, y = y, method = "delete.missing")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Mean imputation
   if(2 %in% which){
     newx <- .easyimpute(x, mean, na.rm = T)
     newy <- .easyimpute(y, mean, na.rm = T)
-    mod2 <- summary(stats::lm(newy ~ newx))
-    ps[2] <- mod2$coefficients[2, 4]
-    r2s[2] <- mod2$r.squared
+    report <- .report.association(x = newx, y = newy, method = "impute.mean")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Median imputation
   if(3 %in% which){
-    newx <- .easyimpute(x, mean, na.rm = T)
-    newy <- .easyimpute(y, mean, na.rm = T)
-    mod3 <- summary(stats::lm(newy ~ newx))
-    ps[3] <- mod3$coefficients[2, 4]
-    r2s[3] <- mod3$r.squared
+    newx <- .easyimpute(x, median, na.rm = T)
+    newy <- .easyimpute(y, median, na.rm = T)
+    report <- .report.association(x = newx, y = newy, method = "impute.median")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Mode imputation
   if(4 %in% which){
     newx <- .easyimpute(x, .estimate_mode)
     newy <- .easyimpute(y, .estimate_mode)
-    mod4 <- summary(stats::lm(newy ~ newx))
-    ps[4] <- mod4$coefficients[2, 4]
-    r2s[4] <- mod4$r.squared
+    report <- .report.association(x = newx, y = newy, method = "impute.mode")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): predictive mean matchihng
   dfnew <- as.data.frame(cbind(x, y))
   if(5 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "pmm", silent = TRUE, print = FALSE)
-    mod5 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[5] <- mod5$coefficients[2, 4]
-    r2s[5] <- mod5$r.squared
+    dat5 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat5$x, y = dat5$y, method = "mice.pmm")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): Weighted predictive mean matching
   if(6 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "midastouch", silent = TRUE, print = FALSE)
-    mod6 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[6] <- mod6$coefficients[2, 4]
-    r2s[6] <- mod6$r.squared
+    dat6 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat6$x, y = dat6$y, method = "mice.midastouch")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): Sample from observed values
   if(7 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "sample", silent = TRUE, print = FALSE)
-    mod7 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[7] <- mod7$coefficients[2, 4]
-    r2s[7] <- mod7$r.squared
+    dat7 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat7$x, y = dat7$y, method = "mice.sample")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): Bayesian linear regression
   if(8 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "norm", silent = TRUE, print = FALSE)
-    mod8 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[8] <- mod8$coefficients[2, 4]
-    r2s[8] <- mod8$r.squared
+    dat8 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat8$x, y = dat8$y, method = "mice.norm")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): Linear regression ignoring model error
   if(9 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "norm.nob", silent = TRUE, print = FALSE)
-    mod9 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[9] <- mod9$coefficients[2, 4]
-    r2s[9] <- mod9$r.squared
+    dat9 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat9$x, y = dat9$y, method = "mice.norm.nob")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
   # Multivariate imputations by chained equations ("mice" package): Linear regression predicted values
   if(10 %in% which){
     imp <- mice::mice(dfnew, m = 1, method = "norm.predict", silent = TRUE, print = FALSE)
-    mod10 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
-    ps[10] <- mod10$coefficients[2, 4]
-    r2s[10] <- mod10$r.squared
+    dat10 <- mice::complete(imp, 1)
+    report <- .report.association(x = dat10$x, y = dat10$y, method = "mice.norm.predict")
+    analyses[[length(analyses)+1]] <- list(report = report,
+                                           r2 = tanh(report[["effect"]])^2)
   }
 
-  ps <- ps[!is.na(ps)]
-  r2s <- r2s[!is.na(r2s)]
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-Hacking with different sorts of outlier definition missing value imputation
-#' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs Integer giving number of observations
 #' @param missing Percentage of missing values (e.g., 0.1 for 10 percent)
 #' @param which Which imputation methods?  Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values)
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.impHack <- function(nobs, missing, which = c(1:10), strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.impHack <- function(nobs, missing, which = c(1:10), strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0, missing = missing)
+    dat[[i]] <- .sim.association(nobs = nobs, effect = effect, heterogeneity = heterogeneity, missing = missing)
   }
 
   if(any(which == "random")) which <- sample(c(1:10), 5)
@@ -199,21 +205,8 @@ sim.impHack <- function(nobs, missing, which = c(1:10), strategy = "firstsig", a
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ps.all <- list()
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 
 }
 
diff --git a/phackR/R/helpers.R b/phackR/R/helpers.R
index 84bb9d9..733c63a 100644
--- a/phackR/R/helpers.R
+++ b/phackR/R/helpers.R
@@ -43,16 +43,38 @@
 
 }
 
+#' Draw one study-level effect size
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
+
+.draw.study.effect <- function(effect = 0, heterogeneity = 0){
+
+  stopifnot(length(effect) == 1)
+  stopifnot(length(heterogeneity) == 1)
+  stopifnot(heterogeneity >= 0)
+
+  if(heterogeneity == 0){
+    return(effect)
+  }
+
+  stats::rnorm(1, mean = effect, sd = heterogeneity)
+
+}
+
 #' Generic sampling function
 #' @description Outputs a data frame with two columns
 #' @param nobs.group Number of observations per group. Either a scalar or a vector with two elements.
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
+#' @param theta Study-specific true effect size
 #' @importFrom stats rnorm
 
-.sim.data <- function(nobs.group){
+.sim.data <- function(nobs.group, effect = 0, heterogeneity = 0, theta = NULL){
 
   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
+  if(is.null(theta)) theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
   V1 <- stats::rnorm(nobs.group[1], 0, 1)
-  V2 <- stats::rnorm(nobs.group[2], 0, 1)
+  V2 <- stats::rnorm(nobs.group[2], theta, 1)
   group <- c(rep(1, nobs.group[1]), rep(2, nobs.group[2]))
 
   res <- cbind(group, c(V1, V2))
@@ -108,6 +130,40 @@
 
 }
 
+#' Select a p-hacked analysis from a vector of p-values
+#' @description Takes a vector of p-values and returns the index of the selected analysis.
+#' @param ps Vector of p values
+#' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param alpha Significance level (default: 0.05)
+
+.selectanalysis <- function(ps, strategy, alpha){
+
+  ps.clean <- ps
+  ps.clean[!is.finite(ps.clean)] <- Inf
+
+  selected <- 1
+
+  if(strategy == "smallest.sig"){
+    sig <- which(ps.clean < alpha)
+    if(length(sig) > 0){
+      selected <- sig[which.min(ps.clean[sig])]
+    }
+
+  } else if(strategy == "firstsig") {
+
+    sig <- which(ps.clean < alpha)
+    if(length(sig) > 0){
+      selected <- sig[1]
+    }
+
+  } else if(strategy == "smallest") {
+    selected <- which.min(ps.clean)
+  }
+
+  return(selected)
+
+}
+
 #' Select a p-value from a vector of p-hacked p-values
 #' @description Takes a vector of p-values and selects the smallest, first significant, or smallest significant p-value.
 #' @param ps Vector of p values
@@ -115,34 +171,83 @@
 #' @param alpha Significance level (default: 0.05)
 
 .selectpvalue <- function(ps, strategy, alpha){
+  ps[.selectanalysis(ps = ps, strategy = strategy, alpha = alpha)]
 
-  p.final <- NA
-  p.orig <- ps[1]
+}
 
-  # Select smallest significant p-value
-  if(strategy == "smallest.sig"){
+#' Convert a Fisher-z value to a correlation
+#' @param effect Fisher-z effect size
 
-    if(min(ps) < alpha){
-      p.final <- min(ps)
-    } else {
-      p.final <- p.orig
-    }
+.fisherz_to_r <- function(effect){
+  pmax(pmin(tanh(effect), 0.999999), -0.999999)
+}
 
-    # Select first significant p-value
-  } else if (strategy == "firstsig") {
+#' Convert a correlation to Fisher-z
+#' @param r Correlation
 
-    if(min(ps) < alpha){
-      p.final <- ps[which(ps < alpha)[1]]
-    } else {
-      p.final <- p.orig
-    }
+.r_to_fisherz <- function(r){
+  atanh(pmax(pmin(r, 0.999999), -0.999999))
+}
+
+#' Compute the standard error of Fisher-z
+#' @param n Sample size
+
+.compFisherZSE <- function(n){
+
+  if(n <= 3){
+    return(NA_real_)
+  }
+
+  1/sqrt(n-3)
+
+}
+
+#' Simulate two continuous variables with a study-level association
+#' @param nobs Number of observations
+#' @param effect Mean Fisher-z effect size across studies
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
+#' @param theta Study-specific true effect size
+#' @param missing Proportion of missing values per variable
+
+.sim.association <- function(nobs, effect = 0, heterogeneity = 0, theta = NULL, missing = 0){
+
+  if(is.null(theta)) theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+
+  .sim.multcor(nobs = nobs, nvar = 2, r = .fisherz_to_r(theta), missing = missing)
+
+}
+
+#' Simulate one criterion and multiple predictors with shared study-level association
+#' @param nobs Number of observations
+#' @param nvar Number of predictor variables
+#' @param r Correlation between predictor variables
+#' @param effect Mean Fisher-z effect size across studies
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
+#' @param theta Study-specific true effect size
+#' @importFrom stats rnorm
 
-    # Select smallest p-value
-  } else if (strategy == "smallest") {
-    p.final <- min(ps)
+.sim.multregression <- function(nobs, nvar, r, effect = 0, heterogeneity = 0, theta = NULL){
+
+  if(length(nobs) > 1) nobs <- nobs[1]
+  if(is.null(theta)) theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+
+  rho <- .fisherz_to_r(theta)
+  R <- matrix(rep(r, (nvar+1)^2), nrow = nvar+1)
+  diag(R) <- rep(1, nvar+1)
+  R[1, -1] <- rep(rho, nvar)
+  R[-1, 1] <- R[1, -1]
+
+  cholR <- tryCatch(t(chol(R)), error = function(e) NULL)
+  if(is.null(cholR)){
+    R <- as.matrix(Matrix::nearPD(R, corr = TRUE)$mat)
+    cholR <- t(chol(R))
   }
 
-  return(p.final)
+  random.normal <- matrix(stats::rnorm((nvar+1)*nobs), nrow = nvar+1, ncol = nobs)
+  X <- as.data.frame(t(cholR %*% random.normal))
+  colnames(X)[1] <- "criterion"
+
+  return(X)
 
 }
 
@@ -151,6 +256,8 @@
 #' @param y values of group 2
 
 .compR2t <- function(x, y){
+  x <- x[!is.na(x)]
+  y <- y[!is.na(y)]
   grandmean <- mean(c(x, y))
   sst <- sum((c(x,y)-grandmean)^2)
   sse <- sum((x-mean(x))^2)+sum((y-mean(y))^2)
@@ -165,3 +272,308 @@
 .compCohensD <- function(t, n){
   t*sqrt(2/n)
 }
+
+#' Compute Cohen's d from a test statistic and group sizes
+#' @param statistic Test statistic
+#' @param n1 Sample size in group 1
+#' @param n2 Sample size in group 2
+
+.compCohensDStat <- function(statistic, n1, n2){
+  statistic*sqrt((1/n1) + (1/n2))
+}
+
+#' Compute Cohen's d from the observed data
+#' @param control values of the control group
+#' @param treatment values of the treatment group
+
+.compCohensDData <- function(control, treatment){
+
+  control <- control[!is.na(control)]
+  treatment <- treatment[!is.na(treatment)]
+
+  n1 <- length(control)
+  n2 <- length(treatment)
+
+  if(n1 < 2 || n2 < 2){
+    return(NA_real_)
+  }
+
+  sp <- sqrt((((n1-1)*stats::var(control)) + ((n2-1)*stats::var(treatment)))/(n1+n2-2))
+
+  if(!is.finite(sp) || sp == 0){
+    return(NA_real_)
+  }
+
+  (mean(treatment)-mean(control))/sp
+
+}
+
+#' Compute the standard error of Cohen's d
+#' @param d Cohen's d
+#' @param n1 Sample size in group 1
+#' @param n2 Sample size in group 2
+
+.compCohensDSE <- function(d, n1, n2){
+
+  if(!is.finite(d) || n1 < 2 || n2 < 2){
+    return(NA_real_)
+  }
+
+  sqrt(((n1+n2)/(n1*n2)) + ((d^2)/(2*(n1+n2-2))))
+
+}
+
+#' Convert a two-sided p-value to a one-sided p-value
+#' @param p.twosided Two-sided p-value
+#' @param stat Test statistic
+#' @param alternative Direction of the test
+
+.onesided_from_twosided <- function(p.twosided, stat, alternative = "two.sided"){
+
+  if(!is.finite(p.twosided) || !is.finite(stat)){
+    return(NA_real_)
+  }
+
+  if(alternative == "two.sided"){
+    return(p.twosided)
+  }
+
+  halfp <- p.twosided/2
+
+  if(alternative == "greater"){
+    if(stat >= 0){
+      return(halfp)
+    }
+
+    return(1-halfp)
+  }
+
+  if(alternative == "less"){
+    if(stat <= 0){
+      return(halfp)
+    }
+
+    return(1-halfp)
+  }
+
+  stop("Unsupported alternative.")
+
+}
+
+#' Build one normalized reporting entry for a two-group analysis
+#' @param control values of the control group
+#' @param treatment values of the treatment group
+#' @param method Analysis method
+#' @param alternative Direction of the test. For group-comparison analyses, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
+#' @param trim Trimming level for Yuen's test
+#' @param p.override Optional p-value override
+
+.report.twogroup <- function(control, treatment, method = "t.equal", alternative = "two.sided", trim = NULL, p.override = NULL){
+
+  control <- control[!is.na(control)]
+  treatment <- treatment[!is.na(treatment)]
+
+  n1 <- length(control)
+  n2 <- length(treatment)
+  effect <- .compCohensDData(control = control, treatment = treatment)
+  se <- .compCohensDSE(d = effect, n1 = n1, n2 = n2)
+  stat <- NA_real_
+  pval <- NA_real_
+  method.label <- method
+
+  if(method == "t.equal"){
+    mod <- stats::t.test(treatment, control, var.equal = TRUE, alternative = alternative)
+    stat <- unname(mod$statistic)
+    pval <- mod$p.value
+  } else if(method == "t.welch"){
+    mod <- stats::t.test(treatment, control, var.equal = FALSE, alternative = alternative)
+    stat <- unname(mod$statistic)
+    pval <- mod$p.value
+  } else if(method == "wilcox"){
+    mod <- stats::wilcox.test(treatment, control, alternative = alternative)
+    stat <- unname(mod$statistic)
+    pval <- mod$p.value
+  } else if(method == "yuen"){
+    dftest <- data.frame(group = c(rep(1, n1), rep(2, n2)),
+                         dv = c(control, treatment))
+    mod <- WRS2::yuen(dv ~ group, tr = trim, data = dftest)
+    stat <- unname(mod$test)
+    pval <- .onesided_from_twosided(p.twosided = mod$p.value,
+                                    stat = stat,
+                                    alternative = alternative)
+    method.label <- paste0("yuen.", trim)
+  } else {
+    stop("Unsupported method.")
+  }
+
+  if(!is.null(p.override)){
+    pval <- p.override
+  }
+
+  return(list(effect = effect,
+              se = se,
+              n = n1+n2,
+              stat = stat,
+              p = pval,
+              method = method.label))
+
+}
+
+#' Build one normalized reporting entry for a simple association analysis
+#' @param x Predictor values
+#' @param y Criterion values
+#' @param method Analysis method
+#' @param alternative Direction of the test. For association analyses, \code{"greater"} tests a positive association.
+#' @param p.override Optional p-value override
+#' @param stat.override Optional statistic override
+
+.report.association <- function(x, y, method = "lm", alternative = "two.sided", p.override = NULL, stat.override = NULL){
+
+  dat <- data.frame(x = x, y = y)
+  dat <- dat[is.finite(dat$x) & is.finite(dat$y), , drop = FALSE]
+  n <- nrow(dat)
+
+  if(n < 4 || length(unique(dat$x)) < 2 || length(unique(dat$y)) < 2){
+    return(list(effect = NA_real_,
+                se = NA_real_,
+                n = n,
+                stat = NA_real_,
+                p = NA_real_,
+                method = method))
+  }
+
+  fit <- summary(stats::lm(y ~ x, data = dat))
+  effect <- .r_to_fisherz(stats::cor(dat$x, dat$y))
+  stat <- unname(fit$coefficients[2, 3])
+  pval <- .onesided_from_twosided(p.twosided = fit$coefficients[2, 4],
+                                  stat = stat,
+                                  alternative = alternative)
+
+  if(!is.null(stat.override)) stat <- stat.override
+  if(!is.null(p.override)) pval <- p.override
+
+  return(list(effect = effect,
+              se = .compFisherZSE(n = n),
+              n = n,
+              stat = stat,
+              p = pval,
+              method = method))
+
+}
+
+#' Build one normalized reporting entry for a multi-category analysis
+#' @param group Grouping variable
+#' @param y Criterion values
+#' @param method Analysis method
+
+.report.multicat <- function(group, y, method = "anova"){
+
+  dat <- data.frame(group = group, y = y)
+  dat <- dat[!is.na(dat$group) & is.finite(dat$y), , drop = FALSE]
+  dat$group <- factor(dat$group)
+  n <- nrow(dat)
+
+  if(n < 4 || length(unique(dat$group)) < 2){
+    return(list(effect = NA_real_,
+                se = NA_real_,
+                n = n,
+                stat = NA_real_,
+                p = NA_real_,
+                method = method))
+  }
+
+  fit <- summary(stats::lm(y ~ group, data = dat))
+  if(length(fit$fstatistic) == 0){
+    return(list(effect = NA_real_,
+                se = NA_real_,
+                n = n,
+                stat = NA_real_,
+                p = NA_real_,
+                method = method))
+  }
+
+  fstat <- unname(fit$fstatistic["value"])
+  pval <- stats::pf(fstat,
+                    df1 = unname(fit$fstatistic["numdf"]),
+                    df2 = unname(fit$fstatistic["dendf"]),
+                    lower.tail = FALSE)
+  rsign <- sign(stats::cor(as.numeric(dat$group), dat$y))
+  if(!is.finite(rsign) || rsign == 0) rsign <- 1
+
+  return(list(effect = .r_to_fisherz(rsign*sqrt(fit$r.squared)),
+              se = .compFisherZSE(n = n),
+              n = n,
+              stat = fstat,
+              p = pval,
+              method = method))
+
+}
+
+#' Build one normalized reporting entry for a group effect in a linear model
+#' @param formula Model formula
+#' @param data Analysis data
+#' @param groupvar Group variable name
+#' @param method Analysis method
+
+.report.group_lm <- function(formula, data, groupvar = "group", method = "ancova"){
+
+  fit <- stats::lm(formula, data = data)
+  summary.fit <- summary(fit)
+  model.df <- stats::model.frame(fit)
+  group <- model.df[[groupvar]]
+  group <- group[!is.na(group)]
+  group.levels <- sort(unique(group))
+  n1 <- sum(group == group.levels[1])
+  n2 <- sum(group == group.levels[2])
+  coef.row <- rownames(summary.fit$coefficients)
+  row.id <- grep(paste0("^", groupvar), coef.row)[1]
+
+  if(length(group.levels) < 2 || is.na(row.id)){
+    return(list(effect = NA_real_,
+                se = NA_real_,
+                n = nrow(model.df),
+                stat = NA_real_,
+                p = NA_real_,
+                method = method))
+  }
+
+  stat <- unname(summary.fit$coefficients[row.id, 3])
+  effect <- .compCohensDStat(statistic = stat, n1 = n1, n2 = n2)
+
+  return(list(effect = effect,
+              se = .compCohensDSE(d = effect, n1 = n1, n2 = n2),
+              n = nrow(model.df),
+              stat = stat,
+              p = summary.fit$coefficients[row.id, 4],
+              method = method))
+
+}
+
+#' Combine legacy simulation output with the normalized reporting block
+#' @param res List of iteration results
+#' @param legacy.fields Character vector defining the legacy output columns
+
+.combine.phase1.results <- function(res, legacy.fields){
+
+  output <- list()
+
+  for(i in 1:length(legacy.fields)){
+    output[[legacy.fields[i]]] <- vapply(res, function(x) x[[legacy.fields[i]]], numeric(1))
+  }
+
+  output[["effect.initial"]] <- vapply(res, function(x) x[["report.initial"]][["effect"]], numeric(1))
+  output[["effect.final"]] <- vapply(res, function(x) x[["report.final"]][["effect"]], numeric(1))
+  output[["se.initial"]] <- vapply(res, function(x) x[["report.initial"]][["se"]], numeric(1))
+  output[["se.final"]] <- vapply(res, function(x) x[["report.final"]][["se"]], numeric(1))
+  output[["n.initial"]] <- vapply(res, function(x) x[["report.initial"]][["n"]], numeric(1))
+  output[["n.final"]] <- vapply(res, function(x) x[["report.final"]][["n"]], numeric(1))
+  output[["stat.initial"]] <- vapply(res, function(x) x[["report.initial"]][["stat"]], numeric(1))
+  output[["stat.final"]] <- vapply(res, function(x) x[["report.final"]][["stat"]], numeric(1))
+  output[["p.initial"]] <- vapply(res, function(x) x[["report.initial"]][["p"]], numeric(1))
+  output[["p.final"]] <- vapply(res, function(x) x[["report.final"]][["p"]], numeric(1))
+  output[["method.initial"]] <- vapply(res, function(x) x[["report.initial"]][["method"]], character(1))
+  output[["method.final"]] <- vapply(res, function(x) x[["report.final"]][["method"]], character(1))
+
+  return(as.data.frame(output, stringsAsFactors = FALSE))
+
+}
diff --git a/phackR/R/incorrectRounding.R b/phackR/R/incorrectRounding.R
index c40a720..716927d 100644
--- a/phackR/R/incorrectRounding.R
+++ b/phackR/R/incorrectRounding.R
@@ -10,48 +10,56 @@
 #' @param group Scalar defining location of the group vector in the data frame
 #' @param dv Scalar defining location of dependent variable in the data frame
 #' @param roundinglevel Highest p-value that is rounded down to 0.05
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @importFrom stats t.test
 
 .roundhack <- function(df, group, dv, roundinglevel, alternative = "two.sided", alpha = 0.05){
 
-  # Compute t-test
-  pval <- stats::t.test(df[,dv] ~ df[,group],
-                        var.equal = TRUE, alternative = alternative)$p.value
-  r2val <- .compR2t(df[,dv][(df[,group] == unique(df[,group])[1])],
-                    df[,dv][(df[,group] == unique(df[,group])[2])])
+  control <- df[,dv][df[,group] == unique(df[,group])[1]]
+  treatment <- df[,dv][df[,group] == unique(df[,group])[2]]
+  report.initial <- .report.twogroup(control = control,
+                                     treatment = treatment,
+                                     method = "t.equal",
+                                     alternative = alternative)
+  r2val <- .compR2t(control, treatment)
 
   # P-hack p-value
-  if(pval > alpha && pval < roundinglevel){
+  if(report.initial[["p"]] > alpha && report.initial[["p"]] < roundinglevel){
     p.final <- alpha
   } else {
-    p.final <- pval
+    p.final <- report.initial[["p"]]
   }
 
-  ps <- c(pval, p.final)
+  report.final <- report.initial
+  report.final[["p"]] <- p.final
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2val,
-              r2s = rep(r2val, 2)))
+  return(list(ps.hack = p.final,
+              ps.orig = report.initial[["p"]],
+              r2s.hack = r2val,
+              r2s.orig = r2val,
+              report.initial = report.initial,
+              report.final = report.final))
 
 }
 
 #' Simulate p-hacking with incorrect rounding
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param roundinglevel Highest p-value that is rounded down to alpha
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
 #' @param iter Number of iterations
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.roundhack <- function(roundinglevel, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
+sim.roundhack <- function(roundinglevel, effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.data(nobs.group = 30)
+    dat[[i]] <- .sim.data(nobs.group = 30, effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -71,20 +79,7 @@ sim.roundhack <- function(roundinglevel, iter = 1000, alternative = "two.sided",
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 
 }
diff --git a/phackR/R/optionalStopping.R b/phackR/R/optionalStopping.R
index 59cfbe2..1889531 100644
--- a/phackR/R/optionalStopping.R
+++ b/phackR/R/optionalStopping.R
@@ -13,10 +13,9 @@
 #' @param n.max Maximum sample size
 #' @param step Step size of the optional stopping (default is 1)
 #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL.
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @importFrom stats t.test
-#' @importFrom utils tail
 
 .optstop <- function(df, group, dv, n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", alpha = 0.05){
 
@@ -36,49 +35,61 @@
   }
 
   # Compute t-tests
-  mod <- sapply(peeks, FUN = function(x) {stats::t.test(g1[1:x], g2[1:x], var.equal = TRUE, alternative = alternative)})
-  ps <- simplify2array(mod["p.value",])
-  r2s <- sapply(peeks, FUN = function(x) {.compR2t(g1[1:x], g2[1:x])})
-  ds <- .compCohensD(simplify2array(mod["statistic",]), peeks)
+  analyses <- lapply(peeks, function(x){
+    control <- g1[1:x]
+    treatment <- g2[1:x]
+    report <- .report.twogroup(control = control,
+                               treatment = treatment,
+                               method = "t.equal",
+                               alternative = alternative)
+
+    list(report = report,
+         r2 = .compR2t(control, treatment),
+         d = .compCohensDStat(statistic = report[["stat"]], n1 = length(control), n2 = length(treatment)))
+  })
+
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Do the p-hacking
   if(any(ps < alpha) == FALSE){
-    p.final <- utils::tail(ps, 1)
-    r2.final <- utils::tail(r2s, 1)
-    d.final <- utils::tail(ds, 1)
-  } else if (any(ps < alpha) == TRUE) {
-    p.final <- ps[which(ps < alpha)][1]
-    r2.final <- unique(r2s[ps == p.final])
-    d.final <- unique(ds[ps == p.final])
+    final.index <- length(peeks)
+  } else {
+    final.index <- which(ps < alpha)[1]
   }
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s,
-              d.final = d.final,
-              ds = ds))
+  initial.index <- length(peeks)
+
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[initial.index]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[initial.index]][["r2"]],
+              ds.hack = analyses[[final.index]][["d"]],
+              ds.orig = analyses[[initial.index]][["d"]],
+              report.initial = analyses[[initial.index]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 }
 
-#' Simulate p-hacking with incorrect rounding
+#' Simulate p-hacking with optional stopping
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param n.min Minimum sample size
 #' @param n.max Maximum sample size
 #' @param step Step size of the optional stopping (default is 1)
 #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL.
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param iter Number of iterations
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
-#' @importFrom utils tail
 #' @export
 #'
 
-sim.optstop <- function(n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", iter = 1000, alpha = 0.05, shinyEnv = FALSE){
+sim.optstop <- function(n.min, n.max, step = 1, peek = NULL, effect = 0, heterogeneity = 0, alternative = "two.sided", iter = 1000, alpha = 0.05, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.data(nobs.group = n.max)
+    dat[[i]] <- .sim.data(nobs.group = n.max, effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -101,24 +112,7 @@ sim.optstop <- function(n.min, n.max, step = 1, peek = NULL, alternative = "two.
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ds.hack <- NULL
-  ds.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- utils::tail(res[[i]][["ps"]], 1)
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- utils::tail(res[[i]][["r2s"]], 1)
-    ds.hack[i] <- res[[i]][["d.final"]]
-    ds.orig[i] <- utils::tail(res[[i]][["ds"]], 1)
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig", "ds.hack", "ds.orig"))
 
 }
diff --git a/phackR/R/outlierExclusion.R b/phackR/R/outlierExclusion.R
index 3baad71..12f5210 100644
--- a/phackR/R/outlierExclusion.R
+++ b/phackR/R/outlierExclusion.R
@@ -390,218 +390,123 @@
   x <- df[,x]
   y <- df[,y]
 
-  # initialize p value list (one level for each outlier method)
-  ps <- vector("list", 12)
-  r2s <- vector("list", 12)
+  analyses <- list()
 
-  #### Go through each outlier detection method and calculate p values ####
-
-  # Boxplot
-  if(1 %in% which){
+  .report.outliers <- function(dat, label){
 
-    dat <- .out.boxplot(x, y)
-    ps[[1]] <- rep(NA, length(dat))
-    r2s[[1]] <- rep(NA, length(dat))
+    if(is.matrix(dat)) dat <- list(dat)
 
+    out <- vector("list", length(dat))
     for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[1]][i] <- mod$coefficients[2,4]
-      r2s[[1]][i] <- mod$r.squared
+      report <- .report.association(x = dat[[i]][,1], y = dat[[i]][,2],
+                                    method = paste0(label, ".", i))
+      out[[i]] <- list(report = report,
+                       r2 = tanh(report[["effect"]])^2)
     }
+
+    out
+
   }
 
-  # Stem & Leaf
-  if(2 %in% which){
+  #### Go through each outlier detection method and calculate p values ####
 
-    dat <- .out.stemleaf(x, y)
-    ps[[2]] <- rep(NA, length(dat))
-    r2s[[2]] <- rep(NA, length(dat))
+  report.orig <- .report.association(x = x, y = y, method = "lm.original")
+  analyses[[1]] <- list(report = report.orig,
+                        r2 = tanh(report.orig[["effect"]])^2)
 
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[2]][i] <- mod$coefficients[2,4]
-      r2s[[2]][i] <- mod$r.squared
-    }
+  # Boxplot
+  if(1 %in% which){
+    analyses <- c(analyses, .report.outliers(.out.boxplot(x, y), "out.boxplot"))
+  }
+
+  # Stem & Leaf
+  if(2 %in% which){
+    analyses <- c(analyses, .report.outliers(.out.stemleaf(x, y), "out.stemleaf"))
   }
 
   # Standard deviation
   if(3 %in% which){
-
-    dat <- .out.sdrule(x, y)
-    ps[[3]] <- rep(NA, length(dat))
-    r2s[[3]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[3]][i] <- mod$coefficients[2,4]
-      r2s[[3]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.sdrule(x, y), "out.sdrule"))
   }
 
   # Percentile
   if(4 %in% which){
-
-    dat <- .out.percentrule(x, y)
-    ps[[4]] <- rep(NA, length(dat))
-    r2s[[4]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[4]][i] <- mod$coefficients[2,4]
-      r2s[[4]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.percentrule(x, y), "out.percentrule"))
   }
 
   # Studentized residuals
   if(5 %in% which){
-
-    dat <- .out.residual(x, y, type = "stud")
-    ps[[5]] <- rep(NA, length(dat))
-    r2s[[5]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[5]][i] <- mod$coefficients[2,4]
-      r2s[[5]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.residual(x, y, type = "stud"), "out.residual.stud"))
   }
 
   # Standardized residuals
   if(6 %in% which){
-
-    dat <- .out.residual(x, y, type = "stan")
-    ps[[6]] <- rep(NA, length(dat))
-    r2s[[6]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[6]][i] <- mod$coefficients[2,4]
-      r2s[[6]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.residual(x, y, type = "stan"), "out.residual.stan"))
   }
 
   # DFBETA
   if(7 %in% which){
-
-    dat <- .out.dfbeta(x, y)
-    ps[[7]] <- rep(NA, length(dat))
-    r2s[[7]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[7]][i] <- mod$coefficients[2,4]
-      r2s[[7]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.dfbeta(x, y), "out.dfbeta"))
   }
 
   # DFFITS
   if(8 %in% which){
-
-    dat <- .out.dffits(x, y)
-    ps[[8]] <- rep(NA, length(dat))
-    r2s[[8]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[8]][i] <- mod$coefficients[2,4]
-      r2s[[8]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.dffits(x, y), "out.dffits"))
   }
 
   # Cook's distance
   if(9 %in% which){
-
-    dat <- .out.cook(x, y)
-    ps[[9]] <- rep(NA, length(dat))
-    r2s[[9]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[9]][i] <- mod$coefficients[2,4]
-      r2s[[9]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.cook(x, y), "out.cook"))
   }
 
   # Mahalanobis distance
   if(10 %in% which){
-
-    dat <- .out.mahalanobis(x, y)
-    ps[[10]] <- rep(NA, length(dat))
-    r2s[[10]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[10]][i] <- mod$coefficients[2,4]
-      r2s[[10]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.mahalanobis(x, y), "out.mahalanobis"))
   }
 
   # Leverage levels
   if(11 %in% which){
-
-    dat <- .out.leverage(x, y)
-    ps[[11]] <- rep(NA, length(dat))
-    r2s[[11]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[11]][i] <- mod$coefficients[2,4]
-      r2s[[11]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.leverage(x, y), "out.leverage"))
   }
 
   # Covariance ratio
   if(12 %in% which){
-
-    dat <- .out.covratio(x, y)
-    ps[[12]] <- rep(NA, length(dat))
-    r2s[[12]] <- rep(NA, length(dat))
-
-    for(i in 1:length(dat)){
-      mod <- summary(stats::lm(dat[[i]][,2] ~ dat[[i]][,1]))
-      ps[[12]][i] <- mod$coefficients[2,4]
-      r2s[[12]][i] <- mod$r.squared
-    }
+    analyses <- c(analyses, .report.outliers(.out.covratio(x, y), "out.covratio"))
   }
 
-  # Compute original p value
-  mod <- summary(stats::lm(y ~ x))
-  p.orig <- mod$coefficients[2,4]
-  r2.orig <- mod$r.squared
-
   # Combine all p values and remove NAs
-  ps <- c(p.orig, unlist(ps))
-  ps <- ps[!is.na(ps)]
-  r2s <- c(r2.orig, unlist(r2s))
-  r2s <- r2s[!is.na(ps)]
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-Hacking with different sorts of outlier definition
-#' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs Integer giving number of observations
 #' @param which Which outlier detection methods?  Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: boxplot, 2: stem&leaf, 3: standard deviation, 4: percentile, 5: studentized residuals, 6: standardized residuals, 7: DFBETA, 8: DFFITS, 9: Cook's D, 10: Mahalanobis distance, 11: Leverage values, 12: Covariance ratio)
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.outHack <- function(nobs, which = c(1:12), strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.outHack <- function(nobs, which = c(1:12), strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
+    dat[[i]] <- .sim.association(nobs = nobs, effect = effect, heterogeneity = heterogeneity)
   }
 
   # If which = "random
@@ -624,21 +529,6 @@ sim.outHack <- function(nobs, which = c(1:12), strategy = "firstsig", alpha = 0.
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ps.all <- list()
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-    #ps.all[[i]] <- res[[i]][["ps"]]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 }
diff --git a/phackR/R/plotsShiny.R b/phackR/R/plotsShiny.R
index adc0fe7..429ccd5 100644
--- a/phackR/R/plotsShiny.R
+++ b/phackR/R/plotsShiny.R
@@ -8,7 +8,7 @@
 #' @importFrom ggplot2 ggplot geom_histogram aes theme_light xlab ggtitle theme element_text geom_vline scale_fill_manual layer_scales ylab geom_segment geom_col scale_x_continuous scale_y_continuous waiver
 #' @importFrom rlang .data
 #' @importFrom dplyr all_of mutate
-#' @importFrom magrittr "%$%"
+#' @importFrom magrittr "%$%" "%>%"
 
 pplots <- function(simdat, alpha){
 
diff --git a/phackR/R/selectiveReportingDV.R b/phackR/R/selectiveReportingDV.R
index 4d79494..0b5c5e2 100644
--- a/phackR/R/selectiveReportingDV.R
+++ b/phackR/R/selectiveReportingDV.R
@@ -7,8 +7,10 @@
 #' @param nobs.group Vector giving number of observations per group
 #' @param nvar Number of dependent variables in the data frame
 #' @param r Desired correlation between the dependent variables (scalar)
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
 
-.sim.multDV <- function(nobs.group, nvar, r){
+.sim.multDV <- function(nobs.group, nvar, r, effect = 0, heterogeneity = 0){
 
   # Observations per group
   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
@@ -18,6 +20,8 @@
 
   # Generate dependent variables
   dvs <- .sim.multcor(nobs = sum(nobs.group), nvar = nvar, r = r)
+  theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+  dvs[(nobs.group[1]+1):sum(nobs.group), ] <- dvs[(nobs.group[1]+1):sum(nobs.group), ] + theta
 
   # Generate data frame
   res <- cbind(group, dvs)
@@ -31,7 +35,7 @@
 #' @param dvs Vector defining the DV columns (will be checked in given order)
 #' @param group Scalar defining grouping column
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test
 #' @importFrom stats t.test
 
@@ -40,53 +44,58 @@
   # Prepare data frame
   dvs <- as.matrix(df[, dvs], ncol = length(dvs))
   group <- df[, group]
-  mod <- list()
-  r2s <- NULL
+  analyses <- list()
 
   # Compute t-tests
   for(i in 1:ncol(dvs)){
-
-    mod[[i]] <- stats::t.test(dvs[, i] ~ group,
-                           var.equal = TRUE, alternative = alternative)
-    r2s[i] <- .compR2t(dvs[group == unique(group)[1], i],
-                       dvs[group == unique(group)[2], i])
+    control <- dvs[group == unique(group)[1], i]
+    treatment <- dvs[group == unique(group)[2], i]
+    report <- .report.twogroup(control = control,
+                               treatment = treatment,
+                               method = "t.equal",
+                               alternative = alternative)
+    analyses[[i]] <- list(report = report,
+                          r2 = .compR2t(control, treatment),
+                          d = .compCohensDStat(statistic = report[["stat"]],
+                                               n1 = length(control),
+                                               n2 = length(treatment)))
   }
 
-  ps <- unlist(simplify2array(mod)["p.value", ])
-  ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(df[, group])/2)
-
-  # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
-  d.final <- unique(ds[ps == p.final])
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s,
-              d.final = d.final,
-              ds = ds))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              ds.hack = analyses[[final.index]][["d"]],
+              ds.orig = analyses[[1]][["d"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-Hacking with multiple dependent variables
-#' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs.group Vector giving number of observations per group
 #' @param nvar Number of dependent variables (columns) in the data frame
 #' @param r Desired correlation between the dependent variables (scalar)
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
 #' @param iter Number of simulation iterations
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.multDVhack <- function(nobs.group, nvar, r, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
+sim.multDVhack <- function(nobs.group, nvar, r, strategy = "firstsig", effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multDV(nobs.group = nobs.group, nvar = nvar, r = r)
+    dat[[i]] <- .sim.multDV(nobs.group = nobs.group, nvar = nvar, r = r,
+                            effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -108,24 +117,7 @@ sim.multDVhack <- function(nobs.group, nvar, r, strategy = "firstsig", iter = 10
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ds.hack <- NULL
-  ds.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-    ds.hack[i] <- res[[i]][["d.final"]]
-    ds.orig[i] <- res[[i]][["ds"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig", "ds.hack", "ds.orig"))
 }
 
diff --git a/phackR/R/selectiveReportingIV.R b/phackR/R/selectiveReportingIV.R
index 863ceec..c845611 100644
--- a/phackR/R/selectiveReportingIV.R
+++ b/phackR/R/selectiveReportingIV.R
@@ -8,22 +8,33 @@
 #' @param nvar Number of independent variables in the data frame
 #' @param r Desired correlation between the independent variables (scalar)
 #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
 
-.sim.multIV <- function(nobs.group, nvar, r, regression = FALSE){
+.sim.multIV <- function(nobs.group, nvar, r, regression = FALSE, effect = 0, heterogeneity = 0){
+
+  if(regression){
+    return(.sim.multregression(nobs = nobs.group, nvar = nvar, r = r,
+                               effect = effect, heterogeneity = heterogeneity))
+  }
 
   # Observations per group
   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 
-  # Simulate control group / criterion variable
+  # Simulate control group
   control <- rnorm(nobs.group[1])
-  if(regression) criterion <- control
 
   # Simulate multiple experimental groups / predictor variables
   ivs <- .sim.multcor(nobs = nobs.group[2], nvar = nvar, r = r)
+  theta <- .draw.study.effect(effect = effect, heterogeneity = heterogeneity)
+  ivs <- ivs + theta
 
   # Generate data frame
-  res <- cbind(control, ivs)
-  if(regression) colnames(res)[1] <- "criterion"
+  nrows <- max(length(control), nrow(ivs))
+  control.pad <- c(control, rep(NA, nrows-length(control)))
+  ivs.pad <- matrix(NA, nrow = nrows, ncol = ncol(ivs))
+  ivs.pad[1:nrow(ivs), ] <- as.matrix(ivs)
+  res <- cbind(control.pad, ivs.pad)
 
   return(res)
 
@@ -35,7 +46,7 @@
 #' @param ivs Location of the independent variables (treatment groups) in the (wide) data frame
 #' @param control Location of the control group in the (wide) data frame
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). For the t-test path, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @importFrom stats t.test
 
@@ -45,29 +56,32 @@
   control <- df[, control]
 
   # Prepare dataset
-  mod <- list()
-  r2s <- rep(NA, length(ivs))
+  analyses <- list()
 
   # Compute t-tests
   for(i in 1:length(ivs)){
-    mod[[i]] <- stats::t.test(control, treatm[,i], var.equal = TRUE, alternative = alternative)
-    r2s[i] <- .compR2t(control, treatm[,i])
+    report <- .report.twogroup(control = control,
+                               treatment = treatm[,i],
+                               method = "t.equal",
+                               alternative = alternative)
+    analyses[[i]] <- list(report = report,
+                          r2 = .compR2t(control, treatm[,i]),
+                          d = .compCohensDStat(statistic = report[["stat"]],
+                                               n1 = length(control),
+                                               n2 = length(treatm[,i])))
   }
 
-  ps <- unlist(simplify2array(mod)["p.value", ])
-  ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(control))
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- r2s[ps == p.final]
-  d.final <- ds[ps == p.final]
-
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s,
-              d.final = d.final,
-              ds = ds))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              ds.hack = analyses[[final.index]][["d"]],
+              ds.orig = analyses[[1]][["d"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
@@ -77,7 +91,7 @@
 #' @param ivs Location of the independent variables (predictors) in the data frame
 #' @param control Location of the criterion in the data frame
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). For the regression path, \code{"greater"} tests a positive association.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @importFrom stats t.test
 
@@ -87,50 +101,60 @@
   criterion <- df[, control]
   
   # Prepare dataset
-  ps <- rep(NA, length(ivs))
-  r2s <- rep(NA, length(ivs))
+  analyses <- list()
   
   # Compute regressions
   for(i in 1:length(ivs)){
-    mod <- summary(stats::lm(criterion ~ predictors[,i]))
-    ps[i] <- mod$coefficients[2, 4]
-    r2s[i] <- mod$r.squared
+    report <- .report.association(x = predictors[,i], y = criterion,
+                                  method = paste0("lm.predictor.", i),
+                                  alternative = alternative)
+    analyses[[i]] <- list(report = report,
+                          r2 = tanh(report[["effect"]])^2)
   }
   
-  # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- r2s[ps == p.final]
-
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
+
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
   
 }
 
 #' Simulate p-Hacking with multiple independent variables
-#' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs.group Vector giving number of observations per group
 #' @param nvar Number of independent variables (columns) in the data frame
 #' @param r Desired correlation between the dependent variables (scalar)
 #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies. For \code{regression = FALSE}, this is on the standardized mean-difference scale. For \code{regression = TRUE}, it is on the Fisher-z scale.
+#' @param heterogeneity Between-study heterogeneity. For \code{regression = FALSE}, this is on the standardized mean-difference scale. For \code{regression = TRUE}, it is on the Fisher-z scale.
 #' @param iter Number of simulation iterations
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). For \code{regression = FALSE}, \code{"greater"} tests whether the treatment or second group exceeds the control or first group. For \code{regression = TRUE}, it tests a positive association.
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.multIVhack <- function(nobs.group, nvar, r, regression=FALSE, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
+sim.multIVhack <- function(nobs.group, nvar, r, regression=FALSE, strategy = "firstsig", effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multIV(nobs.group = nobs.group, nvar = nvar, r = r, regression=regression)
+    dat[[i]] <- .sim.multIV(nobs.group = nobs.group, nvar = nvar, r = r,
+                            regression = regression, effect = effect,
+                            heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
-  .multIVhack <- ifelse(regression, .multIVhack_reg, .multIVhack_ttest)
+  if(regression){
+    .multIVhack <- .multIVhack_reg
+  } else {
+    .multIVhack <- .multIVhack_ttest
+  }
   
   .multIVhacklist <- function(x){
     .multIVhack(df = x, ivs = c(2:(nvar+1)), control = 1,
@@ -153,27 +177,12 @@ sim.multIVhack <- function(nobs.group, nvar, r, regression=FALSE, strategy = "fi
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ds.hack <- NULL
-  ds.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-    if(!regression){
-      ds.hack[i] <- res[[i]][["d.final"]]
-      ds.orig[i] <- res[[i]][["ds"]][1]
-    }
+  if(regression){
+    return(.combine.phase1.results(res = res,
+                                   legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig")))
   }
 
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-  if(!regression) res <- cbind(res, ds.hack, ds.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig", "ds.hack", "ds.orig"))
 
 }
diff --git a/phackR/R/statAnalysis.R b/phackR/R/statAnalysis.R
index 7833a7d..b0b0183 100644
--- a/phackR/R/statAnalysis.R
+++ b/phackR/R/statAnalysis.R
@@ -9,62 +9,62 @@
 #' @param group Location of the grouping variable in the data frame
 #' @param dv Location of the dependent variabl in the data frame
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test
 #' @importFrom stats t.test wilcox.test
 #' @importFrom WRS2 yuen
 
 .statAnalysisHack <- function(df, group, dv, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){
 
-  dftest <- cbind(df[, group], df[, dv])
-  colnames(dftest) <- c("group", "dv")
+  control <- df[df[,group] == unique(df[,group])[1], dv]
+  treatment <- df[df[,group] == unique(df[,group])[2], dv]
 
-  # "Normal" t-test
-  p.orig <- stats::t.test(dv ~ group, var.equal = TRUE, alternative = alternative,
-                          data = dftest)$p.value
+  analyses <- list(
+    .report.twogroup(control = control, treatment = treatment,
+                     method = "t.equal", alternative = alternative),
+    .report.twogroup(control = control, treatment = treatment,
+                     method = "t.welch", alternative = alternative),
+    .report.twogroup(control = control, treatment = treatment,
+                     method = "wilcox", alternative = alternative)
+  )
 
-  # Welch test
-  p.welch <- stats::t.test(dv ~ group, var.equal = FALSE,
-                           alternative = alternative, data = dftest)$p.value
-
-  # Mann-Whitney / Wilcoxon test
-  p.wilcox <- stats::wilcox.test(dv ~ group, alternative = alternative,
-                                 data = dftest)$p.value
-
-  # Yuen test with different levels of trimming
-  p.yuen <- rep(NA, 4)
   trim <- c(0.1, 0.15, 0.2, 0.25)
-  for(i in 1:4) {
-    p.yuen[i] <- WRS2::yuen(dv ~ group, tr = trim[i],
-                            data = as.data.frame(dftest))$p.value
+  for(i in 1:length(trim)){
+    analyses[[length(analyses)+1]] <- .report.twogroup(control = control,
+                                                       treatment = treatment,
+                                                       method = "yuen",
+                                                       trim = trim[i],
+                                                       alternative = alternative)
   }
 
-  ps <- c(p.orig, p.welch, p.wilcox, p.yuen)
+  ps <- vapply(analyses, function(x) x[["p"]], numeric(1))
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-
-  return(list(p.final = p.final,
-              ps = ps))
+  return(list(ps.hack = analyses[[final.index]][["p"]],
+              ps.orig = analyses[[1]][["p"]],
+              report.initial = analyses[[1]],
+              report.final = analyses[[final.index]]))
 
 }
 
 #' Simulate p-Hacking for exploiting different statistical analysis options
-#' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs.group Number of observations per group. Either a scalar or a vector with 2 elements.
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param alpha Significance level of the t-test
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.statAnalysisHack <- function(nobs.group, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.statAnalysisHack <- function(nobs.group, strategy = "firstsig", effect = 0, heterogeneity = 0, alternative = "two.sided", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.data(nobs.group = nobs.group)
+    dat[[i]] <- .sim.data(nobs.group = nobs.group, effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -89,16 +89,8 @@ sim.statAnalysisHack <- function(nobs.group, strategy = "firstsig", alternative
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig"))
 
 
 }
diff --git a/phackR/R/subgroupAnalysis.R b/phackR/R/subgroupAnalysis.R
index 45296d6..2917d4c 100644
--- a/phackR/R/subgroupAnalysis.R
+++ b/phackR/R/subgroupAnalysis.R
@@ -6,10 +6,12 @@
 #' @description Outputs data frame with multiple binary variables from which subgroups can be extracted
 #' @param nobs.group Vector giving number of observations per group
 #' @param nsubvars Integer specifying number of variables for potential subgroups
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
 
-.sim.subgroup <- function(nobs.group, nsubvars){
+.sim.subgroup <- function(nobs.group, nsubvars, effect = 0, heterogeneity = 0){
 
-  dat <- .sim.data(nobs.group)
+  dat <- .sim.data(nobs.group = nobs.group, effect = effect, heterogeneity = heterogeneity)
 
   # Observations per group and total observations
   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
@@ -32,85 +34,78 @@
 #' @param iv Integer specifying the location of the binary independent variable in the data frame
 #' @param dv Integer specifying the location of the dependent variable in the data frame
 #' @param subvars Vector specifying the location of the subgroup variables in the data frame
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 #' @param alpha Significance level of the t-test
-#' @importFrom dplyr group_by_at do
 #' @importFrom stats t.test
-#' @importFrom dplyr "%>%"
-#' @importFrom rlang .data
 
 .subgroupHack <- function(df, iv, dv, subvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05){
 
-  # Prepare data frame
-  ttest.df <- cbind(df[,iv], df[,dv])
-  subvars.df <- cbind(df[, subvars])
-  dfnew <- as.data.frame(cbind(ttest.df, subvars.df))
+  group.values <- unique(df[,iv])
+  control <- df[df[,iv] == group.values[1], dv]
+  treatment <- df[df[,iv] == group.values[2], dv]
 
-  # Compute p-values, R^2, Cohen's d
-
-  # Not p-hacked
-  mod.orig <- stats::t.test(ttest.df[,2] ~ ttest.df[,1], var.equal = TRUE, alternative = alternative)
-  p.orig <- mod.orig$p.value
-  r2.orig <- .compR2t(ttest.df[ttest.df[,1] == unique(ttest.df[,1])[1],2],
-                      ttest.df[ttest.df[,1] == unique(ttest.df[,1])[2],2])
-  d.orig <- .compCohensD(unname(mod.orig$statistic), nrow(ttest.df)/2)
-
-
-  # p-hacked
-  ps <- list()
-  ds <- list()
-  r2s <- list()
+  analyses <- list(list(report = .report.twogroup(control = control,
+                                                  treatment = treatment,
+                                                  method = "t.equal",
+                                                  alternative = alternative),
+                        r2 = .compR2t(control, treatment)))
+  analyses[[1]][["d"]] <- .compCohensDStat(statistic = analyses[[1]][["report"]][["stat"]],
+                                            n1 = length(control),
+                                            n2 = length(treatment))
 
   for(i in 1:length(subvars)){
-
-    tmp <- dplyr::group_by_at(dfnew, subvars[i]) %>%
-      dplyr::do(as.data.frame(stats::t.test(.data$V2 ~ .data$V1, var.equal = TRUE, alternative = alternative)[c("p.value", "statistic")]))
-    tmp2 <- dplyr::group_by_at(dfnew, subvars[i]) %>%
-      dplyr::do(as.data.frame(table(.data$V1)))
-    tmp3 <- dplyr::group_by_at(dfnew, subvars[i]) %>% do(as.data.frame(.compR2t(.data$V2[.data$V1 == unique(.data$V1)[1]], .data$V2[.data$V1 == unique(.data$V1)[2]])))
-
-    ps[[i]] <- tmp[[2]]
-    ds[[i]] <- c(tmp[[3]][1]*sqrt(sum(1/tmp2[[3]][1:2])), tmp[[3]][2]*sqrt(sum(1/tmp2[[3]][3:4])))
-    r2s[[i]] <- tmp3[[2]]
-
+    levels.current <- sort(unique(df[,subvars[i]]))
+    for(j in 1:length(levels.current)){
+      subset.df <- df[df[,subvars[i]] == levels.current[j], , drop = FALSE]
+      control.sub <- subset.df[subset.df[,iv] == group.values[1], dv]
+      treatment.sub <- subset.df[subset.df[,iv] == group.values[2], dv]
+      report <- .report.twogroup(control = control.sub,
+                                 treatment = treatment.sub,
+                                 method = "t.equal",
+                                 alternative = alternative)
+      analyses[[length(analyses)+1]] <- list(report = report,
+                                             r2 = .compR2t(control.sub, treatment.sub),
+                                             d = .compCohensDStat(statistic = report[["stat"]],
+                                                                  n1 = length(control.sub),
+                                                                  n2 = length(treatment.sub)))
+    }
   }
 
-  ps <- c(p.orig, unlist(ps))
-  r2s <- c(r2.orig, unlist(r2s))
-  ds <- c(d.orig, unlist(ds))
-
-  # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
-  d.final <- unique(ds[ps == p.final])
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s,
-              d.final = d.final,
-              ds = ds))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              ds.hack = analyses[[final.index]][["d"]],
+              ds.orig = analyses[[1]][["d"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-hacking with multiple subgroups
-#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs.group Vector giving number of observations per group
 #' @param nsubvars Integer specifying number of variables for potential subgroups
-#' @param alternative Direction of the t-test ("two.sided", "less", "greater")
+#' @param effect Mean effect size across studies
+#' @param heterogeneity Between-study heterogeneity
+#' @param alternative Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 #' @param alpha Significance level of the t-test
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.subgroupHack <- function(nobs.group, nsubvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.subgroupHack <- function(nobs.group, nsubvars, effect = 0, heterogeneity = 0, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.subgroup(nobs.group = nobs.group, nsubvars = nsubvars)
+    dat[[i]] <- .sim.subgroup(nobs.group = nobs.group, nsubvars = nsubvars,
+                              effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -135,25 +130,8 @@ sim.subgroupHack <- function(nobs.group, nsubvars, alternative = "two.sided", st
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-  ds.hack <- NULL
-  ds.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-    ds.hack[i] <- res[[i]][["d.final"]]
-    ds.orig[i] <- res[[i]][["ds"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig", "ds.hack", "ds.orig"))
 
 }
 
diff --git a/phackR/R/variableTransformation.R b/phackR/R/variableTransformation.R
index 26d2a41..e6a2dad 100644
--- a/phackR/R/variableTransformation.R
+++ b/phackR/R/variableTransformation.R
@@ -30,8 +30,10 @@
 
   Xtrans <- matrix(NA, nrow = nrow(df))
   Xtrans[,1] <- x
+  xlabels <- "x.orig"
   Ytrans <- matrix(NA, nrow = nrow(df))
   Ytrans[,1] <- y
+  ylabels <- "y.orig"
 
   if(transvar != "y" && normality == FALSE){
     Xtrans <- cbind(Xtrans,
@@ -39,6 +41,7 @@
                     sqrt(x+abs(min(x))+1e-10),       # square root transformation
                     1/x                              # inverse
     )
+    xlabels <- c(xlabels, "x.log", "x.sqrt", "x.inv")
   }
 
 
@@ -48,52 +51,55 @@
                     sqrt(y+abs(min(y))+1e-10),       # square root transformation
                     1/y                              # inverse
     )
+    ylabels <- c(ylabels, "y.log", "y.sqrt", "y.inv")
   }
 
   # Calculate p-values for all transformed variables
 
-  ps <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2])
-  r2s <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2])
+  analyses <- list()
 
   for(i in 1:ncol(Xtrans)){
     for(j in 1:ncol(Ytrans)){
-      mod <- summary(stats::lm(Ytrans[,j] ~ Xtrans[,i]))
-      ps[i,j] <- mod$coefficients[2, 4]
-      r2s[i,j] <- mod$r.squared
+      report <- .report.association(x = Xtrans[,i], y = Ytrans[,j],
+                                    method = paste(xlabels[i], ylabels[j], sep = "_"))
+      analyses[[length(analyses)+1]] <- list(report = report,
+                                             r2 = tanh(report[["effect"]])^2)
     }
   }
 
-  ps <- as.vector(ps)
-  r2s <- as.vector(r2s)
+  ps <- vapply(analyses, function(x) x[["report"]][["p"]], numeric(1))
 
   # Select final p-hacked p-value based on strategy
-  p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
-  r2.final <- unique(r2s[ps == p.final])
+  final.index <- .selectanalysis(ps = ps, strategy = strategy, alpha = alpha)
 
-  return(list(p.final = p.final,
-              ps = ps,
-              r2.final = r2.final,
-              r2s = r2s))
+  return(list(ps.hack = analyses[[final.index]][["report"]][["p"]],
+              ps.orig = analyses[[1]][["report"]][["p"]],
+              r2s.hack = analyses[[final.index]][["r2"]],
+              r2s.orig = analyses[[1]][["r2"]],
+              report.initial = analyses[[1]][["report"]],
+              report.final = analyses[[final.index]][["report"]]))
 
 }
 
 #' Simulate p-hacking with variable transformations
-#' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+#' @description Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 #' @param nobs Integer giving number of observations
 #' @param transvar Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)
 #' @param testnorm Should variables only be transformed after a significant test for normality of residuals?
 #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
+#' @param effect Mean effect size across studies on the Fisher-z scale
+#' @param heterogeneity Between-study heterogeneity on the Fisher-z scale
 #' @param alpha Significance level of the t-test (default: 0.05)
 #' @param iter Number of simulation iterations
 #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 #' @export
 
-sim.varTransHack <- function(nobs, transvar, testnorm = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
+sim.varTransHack <- function(nobs, transvar, testnorm = FALSE, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 
   # Simulate as many datasets as desired iterations
   dat <- list()
   for(i in 1:iter){
-    dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
+    dat[[i]] <- .sim.association(nobs = nobs, effect = effect, heterogeneity = heterogeneity)
   }
 
   # Apply p-hacking procedure to each dataset
@@ -118,20 +124,7 @@ sim.varTransHack <- function(nobs, transvar, testnorm = FALSE, strategy = "first
     })
   }
 
-  ps.hack <- NULL
-  ps.orig <- NULL
-  r2s.hack <- NULL
-  r2s.orig <- NULL
-
-  for(i in 1:iter){
-    ps.hack[i] <- res[[i]][["p.final"]]
-    ps.orig[i] <- res[[i]][["ps"]][1]
-    r2s.hack[i] <- res[[i]][["r2.final"]]
-    r2s.orig[i] <- res[[i]][["r2s"]][1]
-  }
-
-  res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
-
-  return(res)
+  .combine.phase1.results(res = res,
+                          legacy.fields = c("ps.hack", "ps.orig", "r2s.hack", "r2s.orig"))
 
 }
diff --git a/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds b/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds
index 7b98f82..beef93b 100644
Binary files a/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds and b/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds differ
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md
index 24f557a..810c158 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md
@@ -2,7 +2,7 @@
 
 The *scale redefinition* strategy assumes that one of the variables in the hypothesis test in question is a composite score (e.g., the mean of items in a personality inventory), and that a researcher manipulates which items are included in the composite score to obtain a significant result.
 
-Here, we assume that the focal hypothesis test is a univariate linear regression, and that items are excluded based on the reliability coefficient Cronbach's &alpha; in an iterative fashion. The underlying idea is to delete the item that contributes least to a reliable score, i.e., the item leading to the highest Cronbach's &alpha; when deleted. After a candidate item for deletion has been found, the regression is recomputed with (1) the reduced score as a predictor, (2) the deleted item as a predictor, and (3) the score of all deleted items as a predictor, and the p-values are recorded.
+Here, we assume that the focal hypothesis test is a univariate linear regression, and that items are excluded based on the reliability coefficient Cronbach's &alpha; in an iterative fashion. The underlying idea is to delete the item that contributes least to a reliable score, i.e., the item leading to the highest Cronbach's &alpha; when deleted. After a candidate item for deletion has been found, the regression is recomputed with (1) the reduced score as a predictor, (2) the deleted item as a predictor, and (3) the score of all deleted items as a predictor, and the p-values are recorded. Because this tab is based on an association model, the effect and heterogeneity settings are specified on the Fisher-z scale.
 
 The simulation function in this Shiny app allows the specification of the total number of items in the score, as well as their correlation. Users can also specify the maximum number of items deleted from the score. Naturally, this number should be smaller than the total number of items. Other options users can specify are the number of observations, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md
index b50bdc1..f60fdfb 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md
@@ -1,5 +1,5 @@
 ### Controlling for Covariates
 
-This p-hacking strategy exploits the common practice of controlling for covariates in statistical analyses. Here, we assume that a researcher is interested in an independent samples t-test. If this test does not yield a significant result, the researcher introduces a number of continuous covariates into the analysis (which will then be computed as an ANCOVA). We assume that all covariates are first entered into the analysis individually, and if this does not yield a significant result, they are added sequentially as y ~ x + cov1, y ~ x + cov1 + cov2, ... (in decreasing order of correlation with the dependent variable).
+This p-hacking strategy exploits the common practice of controlling for covariates in statistical analyses. Here, we assume that a researcher is interested in an independent samples t-test. If this test does not yield a significant result, the researcher introduces a number of continuous covariates into the analysis (which will then be computed as an ANCOVA). We assume that all covariates are first entered into the analysis individually, and if this does not yield a significant result, they are added sequentially as y ~ x + cov1, y ~ x + cov1 + cov2, ... (in decreasing order of correlation with the dependent variable). Although the p-hacked analyses are computed as ANCOVAs, the effect and heterogeneity settings refer to the group effect and are therefore specified on the standardized mean-difference scale.
 
 The simulation function in this Shiny app allows the specification of the number of covariates, as well as their correlation. Users can also specify whether the ANCOVA models should include interaction terms. Note that the inclusion of interaction terms will slow down the computation considerably. Other options users can specify are the number of observations per group, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md
index 28c6ea2..4ed077a 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md
@@ -1,6 +1,6 @@
 ### Discretizing variables
 
-This p-hacking strategy is based on splitting a continuous variable into categories with regard to two or more arbitrary cutoff values. Here, we assume that at the start a researcher plans to conduct a univariate linear regression. If this analysis does not yield a significant result, the researcher discretizes the independent variable and compares the means of the resulting groups in the dependent variable. We simulate three approaches: (1) Compare high-scorers and low-scorers based on a median split; (2) conduct a three-way split of the independent variable and compare the two extreme groups; (3) conduct a three-way split of the independent variables and compare all three groups using an ANOVA.
+This p-hacking strategy is based on splitting a continuous variable into categories with regard to two or more arbitrary cutoff values. Here, we assume that at the start a researcher plans to conduct a univariate linear regression. If this analysis does not yield a significant result, the researcher discretizes the independent variable and compares the means of the resulting groups in the dependent variable. We simulate three approaches: (1) Compare high-scorers and low-scorers based on a median split; (2) conduct a three-way split of the independent variable and compare the two extreme groups; (3) conduct a three-way split of the independent variables and compare all three groups using an ANOVA. The effect and heterogeneity settings in this tab are specified on the Fisher-z scale because the underlying data-generating model is an association model.
 
 The simulation function in this Shiny app allows the specification of the sample size, as well as of the p-value selection method, the significance level &alpha;, and the number of iterations in the simulation.
 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md
index 008cc8f..02863ba 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md
@@ -1,5 +1,5 @@
 ### Favorable Imputation of Missing Values
 
-This p-hacking strategy assumes that the original dataset a researcher is confronted with contains missing values. A researcher engaging in p-hacking can now try out different imputation methods to replace the missing values, until (possibly) a significant result is obtained. Here, we simulate this p-hacking strategy based on a univariate linear regression, because many imputation methods assume a regression context.
+This p-hacking strategy assumes that the original dataset a researcher is confronted with contains missing values. A researcher engaging in p-hacking can now try out different imputation methods to replace the missing values, until (possibly) a significant result is obtained. Here, we simulate this p-hacking strategy based on a univariate linear regression, because many imputation methods assume a regression context. Accordingly, the effect and heterogeneity settings are specified on the Fisher-z scale.
 
 The simulation function in this Shiny app allows the specification of the total number of observations (observations with missing values are included in this number), the percentage of missing values, and the imputation methods that are used. The percentage of missing values defined is the same for the predictor and the outcome variable (e.g., if the percentage is set to 10%, there will be ten percent missing values in both the predictor and the outcome variable). Additionally, users can specify the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md
index 21e466a..480ebbf 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md
@@ -1,7 +1,7 @@
 ### Incorrect Rounding
 
-This p-hacking strategy is not based on tinkering with the data or the analyses, but on misreporting the analysis outcome. Usually, the result of a hypothesis test is significant if p &leq; &alpha;. However, as has been shown (e.g., Hartgerink, van Aert, van Nuijten, Wicherts, & van Assen, 2016), sometimes p-values that are slightly larger than the significance level are reported as significant, that is, p-values are incorrectly rounded down to p = &alpha;. 
+This p-hacking strategy is not based on tinkering with the data or the analyses, but on misreporting the analysis outcome. Usually, the result of a hypothesis test is significant if p &leq; &alpha;. However, as has been shown (e.g., Hartgerink, van Aert, van Nuijten, Wicherts, & van Assen, 2016), sometimes p-values that are slightly larger than the significance level are reported as significant, that is, p-values are incorrectly rounded down to p = &alpha;. In this tab, effect and heterogeneity are specified on the standardized mean-difference scale, just as in the other group-comparison tabs.
 
-In the simulation function in this Shiny app, the user can specify the margin in which p-values should be rounded down, as well as the significance level. For example, if the significance level is specified as &alpha; = 0.05, and the margin is specified as 0.001, then all p-values below 0.05+0.001=0.051 will be reported as significant and rounded down to p = 0.05. Additionally, users can specify the direction of the test, and the number of simulation iterations.
+In the simulation function in this Shiny app, the user can specify the margin in which p-values should be rounded down, as well as the significance level. For example, if the significance level is specified as &alpha; = 0.05, and the margin is specified as 0.001, then all p-values below 0.05+0.001=0.051 will be reported as significant and rounded down to p = 0.05. Additionally, users can specify the direction of the test, where the one-sided option tests a positive effect (treatment > control), and the number of simulation iterations.
 
 Note that type I error rates of this p-hacking strategy can also be determined analytically. The theoretical &alpha;-level after p-hacking is equivalent to the sum of the original alpha level and the rounding margin. 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md
index 89b5b73..8957d38 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md
@@ -1,5 +1,5 @@
 ### Optional Stopping
 
-Researchers engaging in optional stopping repeatedly inspect the results of the statistical tests during data collection. They stop data collection as soon as a significant result has been obtained or a maximum sample size is reached. Here, we assume that the underlying statistical test is an independent-samples t-test.
+Researchers engaging in optional stopping repeatedly inspect the results of the statistical tests during data collection. They stop data collection as soon as a significant result has been obtained or a maximum sample size is reached. Here, we assume that the underlying statistical test is an independent-samples t-test. Therefore, effect and heterogeneity are specified on the standardized mean-difference scale.
 
-In the simulation function provided in this Shiny app, the user can specify the minimum sample size (per group), the maximum sample size (per group), and the number of observations that are collected at each step of the sampling process (*step size*). For example, if the minimum sample size is specified to be 10, the maximum sample size 30, and the step size 5, then interim analyses will be conducted at N = 10, N = 15, N = 20, N = 25, and N = 30. Additionally, users can define the direction of the hypothesis test, the significance level &alpha;, and the number of simulation iterations.
+In the simulation function provided in this Shiny app, the user can specify the minimum sample size (per group), the maximum sample size (per group), and the number of observations that are collected at each step of the sampling process (*step size*). For example, if the minimum sample size is specified to be 10, the maximum sample size 30, and the step size 5, then interim analyses will be conducted at N = 10, N = 15, N = 20, N = 25, and N = 30. Additionally, users can define the direction of the hypothesis test, the significance level &alpha;, and the number of simulation iterations. The one-sided option tests a positive effect, that is, whether the treatment group exceeds the control group.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md
index 03c4a49..b0665ac 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md
@@ -1,6 +1,6 @@
 ### Outlier Exclusion
 
-In this p-hacking strategy, a researcher applies different outlier exclusion criteria to their data with the goal of obtaining a significant result in a focal hypothesis test. Here, we assume that the hypothesis test in question is a univariate linear regression. Further, we assume that the researcher first checks for potential outliers in the predictor variable (x) and in the outcome variable (y), and then reruns the analysis (1) without the xy pairs where x is an outlier, (2) without the xy pairs where y is an outlier, (3) without the xy pairs where x *and* y are outliers. We assume that this is done for each outlier exclusion method.
+In this p-hacking strategy, a researcher applies different outlier exclusion criteria to their data with the goal of obtaining a significant result in a focal hypothesis test. Here, we assume that the hypothesis test in question is a univariate linear regression. Further, we assume that the researcher first checks for potential outliers in the predictor variable (x) and in the outcome variable (y), and then reruns the analysis (1) without the xy pairs where x is an outlier, (2) without the xy pairs where y is an outlier, (3) without the xy pairs where x *and* y are outliers. We assume that this is done for each outlier exclusion method. Because this tab is based on an association model, the effect and heterogeneity settings are specified on the Fisher-z scale.
 
 In the simulation function provided in this Shiny app, users can define the outlier exclusion methods that are applied, as well as the sample size, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md
index 7ca4812..b7a919e 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md
@@ -1,5 +1,5 @@
 ### Selective Reporting of the Dependent Variable
 
-This p-hacking strategy assumes that the dataset contains multiple candidate dependent variables. For example, in a clinical trial, the treatment and control group could be compared on different outcome variables, such as mental and physical well-being. A researcher engaging in p-hacking would conduct one hypothesis test for each dependent variable, and selectively report the significant results. Here, we assume that the hypothesis test in question is an independent-samples t-test.
+This p-hacking strategy assumes that the dataset contains multiple candidate dependent variables. For example, in a clinical trial, the treatment and control group could be compared on different outcome variables, such as mental and physical well-being. A researcher engaging in p-hacking would conduct one hypothesis test for each dependent variable, and selectively report the significant results. Here, we assume that the hypothesis test in question is an independent-samples t-test. Accordingly, effect and heterogeneity are specified on the standardized mean-difference scale.
 
-The simulation function in this Shiny app allows the specification of the number of dependent variables as well as their correlation. Additionally, users can define the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
+The simulation function in this Shiny app allows the specification of the number of dependent variables as well as their correlation. Additionally, users can define the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. The one-sided option tests a positive effect, that is, whether the treatment group exceeds the control group.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md
index aca56ca..bd91f8d 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md
@@ -1,5 +1,5 @@
 ### Selective Reporting of the Independent Variable
 
-This p-hacking strategy assumes that an experiment or clinical trial contains multiple experimental groups and one control group. A researcher engaging in p-hacking statistically compares all experimental groups to the control group, and only report the significant results. Here, we assume that all conducted hypothesis tests are t-tests.
+This p-hacking strategy assumes that an experiment or clinical trial contains multiple experimental groups and one control group. A researcher engaging in p-hacking statistically compares all experimental groups to the control group, and only report the significant results. Here, we assume that all conducted hypothesis tests are t-tests. In this group-comparison setting, effect and heterogeneity are specified on the standardized mean-difference scale.
 
-The simulation function in this Shiny app allows the specification of the number of experimental groups (independent variables), and their correlation. Additionally, users can set the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
+The simulation function in this Shiny app allows the specification of the number of experimental groups (independent variables), and their correlation. Additionally, users can set the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. The one-sided option tests a positive effect, that is, whether the treatment group exceeds the control group.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md
index b3923e1..08f95c8 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md
@@ -1,5 +1,5 @@
 ### Exploiting Alternative Hypothesis Tests
 
-Often, different statistical analysis techniques can be used to answer the same research question. This p-hacking strategy assumes that a researcher tries out different statistical analysis options and decides for the one yielding a significant result. Here, we assume that the hypothesis tests in question are an independent-samples t-test, a Welch test, a Wilcoxon test, and a Yuen test (with different levels of trimming).
+Often, different statistical analysis techniques can be used to answer the same research question. This p-hacking strategy assumes that a researcher tries out different statistical analysis options and decides for the one yielding a significant result. Here, we assume that the hypothesis tests in question are an independent-samples t-test, a Welch test, a Wilcoxon test, and a Yuen test (with different levels of trimming). Therefore, effect and heterogeneity are specified on the standardized mean-difference scale.
 
-The simulation function in this Shiny app allows users to specify the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
+The simulation function in this Shiny app allows users to specify the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. The one-sided option tests a positive effect, that is, whether the treatment group exceeds the control group.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md
index 7c6fc29..c6c9d25 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md
@@ -1,5 +1,5 @@
 ### Subgroup Analyses
 
-This p-hacking strategy assumes that if an initial hypothesis test does not yield a significant result, a researcher would repeat the same hypothesis test on subgroups of the sample (e.g., right-handed and left-handed participants). Here, we assume that all subgroup variables have two levels, and that the hypothesis test is conducted on each level of the subgroup variables. Additionally, we assume that the hypothesis test in question is a t-test (e.g., between an experimental and a control condition). Note that we do not assume that the experimental and control condition are balanced within the subgroups. Therefore, within a subgroup, the number of participants in the experimental and control group can differ.
+This p-hacking strategy assumes that if an initial hypothesis test does not yield a significant result, a researcher would repeat the same hypothesis test on subgroups of the sample (e.g., right-handed and left-handed participants). Here, we assume that all subgroup variables have two levels, and that the hypothesis test is conducted on each level of the subgroup variables. Additionally, we assume that the hypothesis test in question is a t-test (e.g., between an experimental and a control condition). Note that we do not assume that the experimental and control condition are balanced within the subgroups. Therefore, within a subgroup, the number of participants in the experimental and control group can differ. Effect and heterogeneity are therefore specified on the standardized mean-difference scale.
 
-In the simulation function in this Shiny app, users can specify the number of observations per group in the original t-test, the number of subgroup variables, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
+In the simulation function in this Shiny app, users can specify the number of observations per group in the original t-test, the number of subgroup variables, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. The one-sided option tests a positive effect, that is, whether the treatment group exceeds the control group.
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md
index 1eb6f91..80512eb 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md
@@ -1,5 +1,5 @@
 ### Variable Transformation
 
-This p-hacking strategy assumes that if an initial hypothesis test does not yield significant results, a researcher would apply transformations to the variables involved in the test. Here, we assume that the test in question is a univariate linear regression, and that the transformations are a natural log transformation (ln(x)), a square root transformation (&radic;x), and an inverse transformation (1/x). Transformations can be applied to the predictor variable, to the outcome variable, or both.
+This p-hacking strategy assumes that if an initial hypothesis test does not yield significant results, a researcher would apply transformations to the variables involved in the test. Here, we assume that the test in question is a univariate linear regression, and that the transformations are a natural log transformation (ln(x)), a square root transformation (&radic;x), and an inverse transformation (1/x). Transformations can be applied to the predictor variable, to the outcome variable, or both. As in the other regression-based tabs, effect and heterogeneity are specified on the Fisher-z scale.
 
 In the simulation function in this Shiny app, users can specify which of the variables should be transformed. Additionally, they can specify the number of observations, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md b/phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md
index 2f329f0..267430d 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md
+++ b/phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md
@@ -19,7 +19,7 @@ In the literature, p-hacking has typically been described as being comprised of
 Here, we provide an overview of different p-hacking strategies that have been mentioned in the literature, together with a Shiny app that lets users explore the effects of p-hacking on the distribution of hypothesis testing results.
 
 ## Exploring the Effects of P-Hacking
-Each tab of this Shiny app lets the user explore the effects of a different p-hacking strategy. All tabs have the same structure: First, we describe the p-hacking strategy, and how we applied it in our simulations. Below, we present simulation results, specifically the distribution of p-values, the distribution of effect sizes (if applicable), and the rate of false positive results. On a panel on the right side, the user can adjust the settings of the simulation, including the severity of the p-hacking.
+Each tab of this Shiny app lets the user explore the effects of a different p-hacking strategy. All tabs have the same structure: First, we describe the p-hacking strategy, and how we applied it in our simulations. Below, we present simulation results, specifically the distribution of p-values, the distribution of effect sizes (if applicable), and the rate of significant results. If the true effect size is set to 0 and the heterogeneity is set to 0, this rate corresponds to the false positive rate. Otherwise, it should be interpreted as the rejection rate, that is, the rate of significant results. On a panel on the right side, the user can adjust the settings of the simulation, including the severity of the p-hacking.
 
 ### Common Settings
 Several settings are common to the simulation of (almost) all p-hacking strategies. To avoid unnecessary repetition, we will describe these settings here.
@@ -28,7 +28,13 @@ Several settings are common to the simulation of (almost) all p-hacking strategi
 In all simulation functions, it is necessary to specify how the final p-value is determined. There are three options: *first significant* simulates a situation where the researcher conducts a series of hypothesis tests, and stops as soon as the result is significant, that is, at the first significant p-value. In a comment on Simonsohn et al. (2014), Ulrich and Miller (2015) argued that researchers might instead engage in "ambitious" p-hacking, where the researcher conducts a series of hypothesis tests and selects the smallest significant p-value from the set. This strategy is implemented in the *smallest significant* option. Simonsohn (private comm.) argues that there might exist a third p-hacking strategy where the researcher tries a number of different analysis options, and selects the smallest p-value, no matter if it is significant or not. This strategy is implemented in the option *smallest*. The default strategy is *first significant*.
 
 #### True effect size
-The true effect size in all simulations is equal to zero. 
+The true effect size can be specified in all simulations. If the true effect size is set to 0 and the heterogeneity is set to 0, the simulations operate in the false-positive regime. Otherwise, the app shows the rate of significant results rather than a false positive rate.
+
+#### Heterogeneity
+The heterogeneity setting determines how much the true effect size varies across simulated studies. A heterogeneity of 0 means that all simulated studies share the same true effect size. Larger values imply that the study-specific true effects vary around the specified mean effect size.
+
+#### Effect size scale
+The meaning of the effect size depends on the type of hypothesis test. In tabs based on group comparisons, effect size and heterogeneity are specified on the standardized mean-difference scale. In tabs based on regression or correlation analyses, effect size and heterogeneity are specified on the Fisher-z scale. In the tab on controlling for covariates, the effect size still refers to the group effect and is therefore also specified on the standardized mean-difference scale.
 
 #### Significance level
 The significance level &alpha; determines the significance level for each hypothesis test. For example, if the significance level is set to &alpha; = 0.05 (the default), the simulation assumes that a researcher would call the result of a hypothesis test significant if p < 0.05.
@@ -37,7 +43,7 @@ The significance level &alpha; determines the significance level for each hypoth
 The *iterations* option determines the number of iterations in the simulation. The default setting is 1000.
 
 #### Alternative
-Whenever the simulations are based on t-tests, the option *alternative* can be specified. This option relates to the sidedness of the alternative hypothesis in the t-test. It can either be *two-sided* or *greater*. The default setting is *two-sided*.
+Whenever the simulations are based on t-tests, the option *alternative* can be specified. This option relates to the sidedness of the alternative hypothesis in the t-test. It can either be *two-sided* or the one-sided positive-effect option. In the one-sided case, the simulation tests whether the treatment group exceeds the control group, that is, whether the true effect is positive. The default setting is *two-sided*.
 
 #### Number of observations
 The number of observations determines the sample size in the test. In the case of a t-test, the specified number refers to the observations *per group*. In the case of a linear regression, the specified number refers to the overall sample size. 
diff --git a/phackR/inst/shiny-phack/ShinyPHack/server.R b/phackR/inst/shiny-phack/ShinyPHack/server.R
index 4bb2567..13ea233 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/server.R
+++ b/phackR/inst/shiny-phack/ShinyPHack/server.R
@@ -8,6 +8,10 @@ function(input, output) {
   # Initiate variable to store simulation results
   sims <- reactiveValues()
 
+  .escol <- function(simdat, column){
+    match(column, colnames(as.data.frame(simdat)))
+  }
+
   # ------------------- Composite Scores ---------------------------------------
 
   output$uindeleteCompScores <- renderUI({
@@ -22,10 +26,10 @@ function(input, output) {
   output$compScoresPlot5 = renderPlot(startplots$compscorePlotES$esnohack)
 
   observeEvent(input$calcCompScores > 0, {
-      ifelse(length(input$uindeleteCompScores)==0, ndelete<-2, ndelete<-input$uindeleteCompScores)
-      res1 <- sim.compscoreHack(nobs=input$nobsCompScores, ncompv=input$ncompvCompScores, rcomp=input$rcompCompScores, ndelete=ndelete, strategy = input$strategyCompScores, alpha = input$alphaCompScores, iter = input$iterCompScores, shinyEnv=TRUE)
+      ifelse(length(input$ndeleteCompScores)==0, ndelete<-2, ndelete<-input$ndeleteCompScores)
+      res1 <- sim.compscoreHack(nobs=input$nobsCompScores, ncompv=input$ncompvCompScores, rcomp=input$rcompCompScores, ndelete=ndelete, strategy = input$strategyCompScores, effect = input$effectCompScores, heterogeneity = input$heterogeneityCompScores, alpha = input$alphaCompScores, iter = input$iterCompScores, shinyEnv=TRUE)
       compscorePlot <- phackR:::pplots(simdat=res1, alpha=input$alphaCompScores)
-      compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=3, EScolumn.orig=4)
+      compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=.escol(res1, "r2s.hack"), EScolumn.orig=.escol(res1, "r2s.orig"))
       compscore.fprate.p <- paste0(round(sum(res1[,"ps.hack"] < input$alphaCompScores)/input$iterCompScores*100, 2), " %")
       compscore.fprate.o <- paste0(round(sum(res1[,"ps.orig"] < input$alphaCompScores)/input$iterCompScores*100, 2), " %")
       output$compScoresPlot = renderPlot(compscorePlot$pcomp)
@@ -54,10 +58,10 @@ function(input, output) {
   observeEvent(input$calcExpCov > 0, {
     if(input$interactExpCov == "Yes") interactions <- TRUE
     else if(input$interactExpCov == "No") interactions <- FALSE
-    res2 <- sim.covhack(nobs.group = input$nobsExpCov, ncov = input$ncovExpCov, rcov = input$rcovExpCov, rcovdv = input$rcovdvExpCov, interactions = interactions, strategy = input$strategyExpCov, alpha = input$alphaExpCov, iter = input$iterExpCov, shinyEnv=TRUE)
+    res2 <- sim.covhack(nobs.group = input$nobsExpCov, ncov = input$ncovExpCov, rcov = input$rcovExpCov, rcovdv = input$rcovdvExpCov, interactions = interactions, strategy = input$strategyExpCov, effect = input$effectExpCov, heterogeneity = input$heterogeneityExpCov, alpha = input$alphaExpCov, iter = input$iterExpCov, shinyEnv=TRUE)
     expCovPlot <- phackR:::pplots(simdat=res2, alpha=input$alphaExpCov)
-    expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=3, EScolumn.orig=4, titles = c(expression("Distribution of p-hacked effect sizes "*eta^2),
-                                                                                 expression("Distribution of original effect sizes "*eta^2)))
+    expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=.escol(res2, "eta2s.hack"), EScolumn.orig=.escol(res2, "eta2s.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*eta^2),
+                                                                                                                                                   expression("Distribution of original effect sizes "*eta^2)))
     expcov.fprate.p <- paste0(round(sum(res2[,"ps.hack"] < input$alphaExpCov)/input$iterExpCov*100, 2), " %")
     expcov.fprate.o <- paste0(round(sum(res2[,"ps.orig"] < input$alphaExpCov)/input$iterExpCov*100, 2), " %")
     output$expCovPlot = renderPlot(expCovPlot$pcomp)
@@ -84,9 +88,9 @@ function(input, output) {
   output$expCutPlot5 = renderPlot(startplots$expCutES$esnohack)
 
   observeEvent(input$calcExpCut > 0, {
-    res3 <- sim.cutoffHack(nobs = input$nobsExpCut, strategy = input$strategyExpCut, alpha = input$alphaExpCut, iter = input$iterExpCut, shinyEnv=TRUE)
+    res3 <- sim.cutoffHack(nobs = input$nobsExpCut, strategy = input$strategyExpCut, effect = input$effectExpCut, heterogeneity = input$heterogeneityExpCut, alpha = input$alphaExpCut, iter = input$iterExpCut, shinyEnv=TRUE)
     expCutPlot <- phackR:::pplots(simdat=res3, alpha=input$alphaExpCut)
-    expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=3, EScolumn.orig=4)
+    expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=.escol(res3, "r2s.hack"), EScolumn.orig=.escol(res3, "r2s.orig"))
     expcut.fprate.p <- paste0(round(sum(res3[,"ps.hack"] < input$alphaExpCut)/input$iterExpCut*100, 2), " %")
     expcut.fprate.o <- paste0(round(sum(res3[,"ps.orig"] < input$alphaExpCut)/input$iterExpCut*100, 2), " %")
     output$expCutPlot = renderPlot(expCutPlot$pcomp)
@@ -113,9 +117,9 @@ function(input, output) {
   output$favImpPlot5 = renderPlot(startplots$favImpES$esnohack)
 
   observeEvent(input$calcfavImp > 0, {
-    res4 <- sim.impHack(nobs = input$nobsfavImp, missing = input$missingfavImp, which = as.numeric(input$whichImpfavImp), strategy = input$strategyfavImp, alpha = input$alphafavImp, iter = input$iterfavImp, shinyEnv = TRUE)
+    res4 <- sim.impHack(nobs = input$nobsfavImp, missing = input$missingfavImp, which = as.numeric(input$whichImpfavImp), strategy = input$strategyfavImp, effect = input$effectfavImp, heterogeneity = input$heterogeneityfavImp, alpha = input$alphafavImp, iter = input$iterfavImp, shinyEnv = TRUE)
     favImpPlot <- phackR:::pplots(simdat=res4, alpha=input$alphafavImp)
-    favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=3, EScolumn.orig=4)
+    favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=.escol(res4, "r2s.hack"), EScolumn.orig=.escol(res4, "r2s.orig"))
     favimp.fprate.p <- paste0(round(sum(res4[,"ps.hack"] < input$alphafavImp)/input$iterfavImp*100, 2), " %")
     favimp.fprate.o <- paste0(round(sum(res4[,"ps.orig"] < input$alphafavImp)/input$iterfavImp*100, 2), " %")
     output$favImpPlot = renderPlot(favImpPlot$pcomp)
@@ -142,9 +146,9 @@ function(input, output) {
   output$roundingPlot5 = renderPlot(startplots$roundingES$esnohack)
 
   observeEvent(input$calcRounding > 0, {
-    res5 <- sim.roundhack(roundinglevel = input$levelRounding+input$alphaRounding, iter = input$iterRounding, alternative = input$altRounding, alpha = input$alphaRounding, shinyEnv = TRUE)
+    res5 <- sim.roundhack(roundinglevel = input$levelRounding+input$alphaRounding, effect = input$effectRounding, heterogeneity = input$heterogeneityRounding, iter = input$iterRounding, alternative = input$altRounding, alpha = input$alphaRounding, shinyEnv = TRUE)
     roundingPlot <- phackR:::pplots(simdat=res5, alpha=input$alphaRounding)
-    roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=3, EScolumn.orig=4)
+    roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=.escol(res5, "r2s.hack"), EScolumn.orig=.escol(res5, "r2s.orig"))
     rounding.fprate.p <- paste0(sum(round(res5[,"ps.hack"] <= input$alphaRounding)/input$iterRounding*100, 2), " %")
     rounding.fprate.o <- paste0(sum(round(res5[,"ps.orig"] <= input$alphaRounding)/input$iterRounding*100, 2), " %")
     output$roundingPlot = renderPlot(roundingPlot$pcomp)
@@ -173,11 +177,11 @@ function(input, output) {
   output$optStopPlot7 = renderPlot(startplots$optstopESd$esnohack)
 
   observeEvent(input$calcOptStop > 0, {
-    res6 <- sim.optstop(n.min = input$nminOptStop, n.max = input$nmaxOptStop, step = input$stepOptStop, alternative = input$altOptStop, iter = input$iterOptStop, alpha = input$alphaOptStop, shinyEnv = TRUE)
+    res6 <- sim.optstop(n.min = input$nminOptStop, n.max = input$nmaxOptStop, step = input$stepOptStop, effect = input$effectOptStop, heterogeneity = input$heterogeneityOptStop, alternative = input$altOptStop, iter = input$iterOptStop, alpha = input$alphaOptStop, shinyEnv = TRUE)
     optstopPlot <- phackR:::pplots(simdat = res6, alpha = input$alphaOptStop)
-    optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=3, EScolumn.orig=4)
-    optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                    expression("Distribution of original effect sizes "*delta)))
+    optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=.escol(res6, "r2s.hack"), EScolumn.orig=.escol(res6, "r2s.orig"))
+    optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=.escol(res6, "ds.hack"), EScolumn.orig=.escol(res6, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                                    expression("Distribution of original effect sizes "*delta)))
     optstop.fprate.p <- paste0(round(sum(res6[,"ps.hack"] <= input$alphaOptStop)/input$iterOptStop*100, 2), " %")
     optstop.fprate.o <- paste0(round(sum(res6[,"ps.orig"] <= input$alphaOptStop)/input$iterOptStop*100, 2), " %")
     output$optStopPlot <- renderPlot(optstopPlot$pcomp)
@@ -205,9 +209,9 @@ function(input, output) {
   output$outExclPlot5 = renderPlot(startplots$outExclES$esnohack)
 
   observeEvent(input$calcOutExcl > 0, {
-    res7 <- sim.outHack(nobs = input$nobsOutExcl, which = as.numeric(input$whichOutExcl), strategy = input$strategyOutExcl, alpha = input$alphaOutExcl, iter = input$iterOutExcl, shinyEnv = TRUE)
+    res7 <- sim.outHack(nobs = input$nobsOutExcl, which = as.numeric(input$whichOutExcl), strategy = input$strategyOutExcl, effect = input$effectOutExcl, heterogeneity = input$heterogeneityOutExcl, alpha = input$alphaOutExcl, iter = input$iterOutExcl, shinyEnv = TRUE)
     outExclPlot <- phackR:::pplots(simdat = res7, alpha = input$alphaOutExcl)
-    outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = 3, EScolumn.orig = 4)
+    outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = .escol(res7, "r2s.hack"), EScolumn.orig = .escol(res7, "r2s.orig"))
     outExcl.fprate.p <- paste0(round(sum(res7[,"ps.hack"] <= input$alphaOutExcl)/input$iterOutExcl*100, 2), " %")
     outExcl.fprate.o <- paste0(round(sum(res7[,"ps.orig"] <= input$alphaOutExcl)/input$iterOutExcl*100, 2), " %")
     output$outExclPlot <- renderPlot(outExclPlot$pcomp)
@@ -237,11 +241,11 @@ function(input, output) {
   output$SRDVPlot7 = renderPlot(startplots$SRDVESd$esnohack)
 
   observeEvent(input$calcSRDV > 0, {
-    res9 <- sim.multDVhack(nobs.group = input$nobsSRDV, nvar = input$nvarSRDV, r = input$rSRDV, strategy = input$strategySRDV, iter = input$iterSRDV, alternative = input$altSRDV, alpha = input$alphaSRDV, shinyEnv = TRUE)
+    res9 <- sim.multDVhack(nobs.group = input$nobsSRDV, nvar = input$nvarSRDV, r = input$rSRDV, strategy = input$strategySRDV, effect = input$effectSRDV, heterogeneity = input$heterogeneitySRDV, iter = input$iterSRDV, alternative = input$altSRDV, alpha = input$alphaSRDV, shinyEnv = TRUE)
     SRDVPlot <- phackR:::pplots(simdat = res9, alpha = input$alphaSRDV)
-    SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=3, EScolumn.orig=4)
-    SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                    expression("Distribution of original effect sizes "*delta)))
+    SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=.escol(res9, "r2s.hack"), EScolumn.orig=.escol(res9, "r2s.orig"))
+    SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=.escol(res9, "ds.hack"), EScolumn.orig=.escol(res9, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                                 expression("Distribution of original effect sizes "*delta)))
     SRDV.fprate.p <- paste0(round(sum(res9[,"ps.hack"] <= input$alphaSRDV)/input$iterSRDV*100, 2), " %")
     SRDV.fprate.o <- paste0(round(sum(res9[,"ps.orig"] <= input$alphaSRDV)/input$iterSRDV*100, 2), " %")
     output$SRDVPlot <- renderPlot(SRDVPlot$pcomp)
@@ -272,11 +276,11 @@ function(input, output) {
   output$SRIVPlot7 = renderPlot(startplots$SRIVESd$esnohack)
 
   observeEvent(input$calcSRIV > 0, {
-    res10 <- sim.multDVhack(nobs.group = input$nobsSRIV, nvar = input$nvarSRIV, r = input$rSRIV, strategy = input$strategySRIV, iter = input$iterSRIV, alternative = input$altSRIV, alpha = input$alphaSRIV, shinyEnv = TRUE)
+    res10 <- sim.multIVhack(nobs.group = input$nobsSRIV, nvar = input$nvarSRIV, r = input$rSRIV, regression = FALSE, strategy = input$strategySRIV, effect = input$effectSRIV, heterogeneity = input$heterogeneitySRIV, iter = input$iterSRIV, alternative = input$altSRIV, alpha = input$alphaSRIV, shinyEnv = TRUE)
     SRIVPlot <- phackR:::pplots(simdat = res10, alpha = input$alphaSRIV)
-    SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=3, EScolumn.orig=4)
-    SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                 expression("Distribution of original effect sizes "*delta)))
+    SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=.escol(res10, "r2s.hack"), EScolumn.orig=.escol(res10, "r2s.orig"))
+    SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=.escol(res10, "ds.hack"), EScolumn.orig=.escol(res10, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                              expression("Distribution of original effect sizes "*delta)))
     SRIV.fprate.p <- paste0(round(sum(res10[,"ps.hack"] <= input$alphaSRIV)/input$iterSRIV*100, 2), " %")
     SRIV.fprate.o <- paste0(round(sum(res10[,"ps.orig"] <= input$alphaSRIV)/input$iterSRIV*100, 2), " %")
     output$SRIVPlot <- renderPlot(SRIVPlot$pcomp)
@@ -303,7 +307,7 @@ function(input, output) {
   output$statAnalysisFPOrig = renderText(startplots$statAnalysis.fprate.o)
 
   observeEvent(input$calcStatAnalysis > 0, {
-    res11 <- sim.statAnalysisHack(nobs.group = input$nobsStatAnalysis, strategy = input$strategyStatAnalysis, alternative = input$altStatAnalysis, alpha = input$alphaStatAnalysis, iter = input$iterStatAnalysis, shinyEnv = TRUE)
+    res11 <- sim.statAnalysisHack(nobs.group = input$nobsStatAnalysis, strategy = input$strategyStatAnalysis, effect = input$effectStatAnalysis, heterogeneity = input$heterogeneityStatAnalysis, alternative = input$altStatAnalysis, alpha = input$alphaStatAnalysis, iter = input$iterStatAnalysis, shinyEnv = TRUE)
     statAnalysisPlot <- phackR:::pplots(simdat = res11, alpha = input$alphaStatAnalysis)
     statAnalysis.fprate.p <- paste0(round(sum(res11[,"ps.hack"] <= input$alphaStatAnalysis)/input$iterStatAnalysis*100, 2), " %")
     statAnalysis.fprate.o <- paste0(round(sum(res11[,"ps.orig"] <= input$alphaStatAnalysis)/input$iterStatAnalysis*100, 2), " %")
@@ -331,11 +335,11 @@ function(input, output) {
   output$subgroupPlot7 = renderPlot(startplots$subgroupESd$esnohack)
 
   observeEvent(input$calcSubgroup > 0, {
-    res12 <- sim.subgroupHack(nobs.group = input$nobsSubgroup, nsubvars = input$nsubvarsSubgroup, alternative = input$altSubgroup, strategy = input$strategySubgroup, alpha = input$alphaSubgroup, iter = input$iterSubgroup, shinyEnv = TRUE)
+    res12 <- sim.subgroupHack(nobs.group = input$nobsSubgroup, nsubvars = input$nsubvarsSubgroup, effect = input$effectSubgroup, heterogeneity = input$heterogeneitySubgroup, alternative = input$altSubgroup, strategy = input$strategySubgroup, alpha = input$alphaSubgroup, iter = input$iterSubgroup, shinyEnv = TRUE)
     subgroupPlot <- phackR:::pplots(simdat = res12, alpha = input$alphaSubgroup)
-    subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=3, EScolumn.orig=4)
-    subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                 expression("Distribution of original effect sizes "*delta)))
+    subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=.escol(res12, "r2s.hack"), EScolumn.orig=.escol(res12, "r2s.orig"))
+    subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=.escol(res12, "ds.hack"), EScolumn.orig=.escol(res12, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                              expression("Distribution of original effect sizes "*delta)))
     subgroup.fprate.p <- paste0(round(sum(res12[,"ps.hack"] <= input$alphaSubgroup)/input$iterSubgroup*100, 2), " %")
     subgroup.fprate.o <- paste0(round(sum(res12[,"ps.orig"] <= input$alphaSubgroup)/input$iterSubgroup*100, 2), " %")
     output$subgroupPlot <- renderPlot(subgroupPlot$pcomp)
@@ -364,9 +368,9 @@ function(input, output) {
   output$varTransPlot5 = renderPlot(startplots$varTransES$esnohack)
 
   observeEvent(input$calcVarTrans > 0, {
-    res13 <- sim.varTransHack(nobs = input$nobsVarTrans, transvar = input$transvarVarTrans, strategy = input$strategyVarTrans, alpha = input$alphaVarTrans, iter = input$iterVarTrans, shinyEnv = TRUE)
+    res13 <- sim.varTransHack(nobs = input$nobsVarTrans, transvar = input$transvarVarTrans, strategy = input$strategyVarTrans, effect = input$effectVarTrans, heterogeneity = input$heterogeneityVarTrans, alpha = input$alphaVarTrans, iter = input$iterVarTrans, shinyEnv = TRUE)
     varTransPlot <- phackR:::pplots(simdat = res13, alpha = input$alphaVarTrans)
-    varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = 3, EScolumn.orig = 4)
+    varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = .escol(res13, "r2s.hack"), EScolumn.orig = .escol(res13, "r2s.orig"))
     varTrans.fprate.p <- paste0(round(sum(res13[,"ps.hack"] <= input$alphaVarTrans)/input$iterVarTrans*100, 2), " %")
     varTrans.fprate.o <- paste0(round(sum(res13[,"ps.orig"] <= input$alphaVarTrans)/input$iterVarTrans*100, 2), " %")
     output$varTransPlot <- renderPlot(varTransPlot$pcomp)
diff --git a/phackR/inst/shiny-phack/ShinyPHack/ui.R b/phackR/inst/shiny-phack/ShinyPHack/ui.R
index 39b325b..5d8b087 100644
--- a/phackR/inst/shiny-phack/ShinyPHack/ui.R
+++ b/phackR/inst/shiny-phack/ShinyPHack/ui.R
@@ -1,6 +1,28 @@
 library(shinydashboard)
 library(markdown)
 
+.rateBox <- function(hackOutput, origOutput){
+  box(width=6, status = "primary",
+      h4("Significant-result rate (p-hacked)"),
+      textOutput(hackOutput),
+      h4("Significant-result rate (original)"),
+      textOutput(origOutput))
+}
+
+.effectInputs <- function(effectId, heterogeneityId, scaleLabel){
+  tagList(
+    numericInput(effectId,
+                 label = paste0("Mean true effect (", scaleLabel, ")"),
+                 value = 0,
+                 step = 0.1),
+    numericInput(heterogeneityId,
+                 label = paste0("Between-study heterogeneity (", scaleLabel, ")"),
+                 value = 0,
+                 min = 0,
+                 step = 0.1)
+  )
+}
+
 # ==============================================================================
 # Define header
 # ==============================================================================
@@ -65,22 +87,19 @@ body <- dashboardBody(
                                          plotOutput("compScoresPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("compScoresFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("compScoresFPOrig"))),
+                                     .rateBox("compScoresFPHack", "compScoresFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("compScoresPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("compScoresPlot5")))),
                      column(width=4,
-                            box(width=12, status = "primary", height=750, background = "navy",
+                            box(width=12, status = "primary", height=840, background = "navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsCompScores", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 numericInput("ncompvCompScores", label = "Number of items in composite score", value = 5, min = 3, max = 20),
                                 uiOutput("uindeleteCompScores"),
                                 sliderInput("rcompCompScores", label = "Correlation between composite score items", min = 0, max = 0.99, value = 0.8, step = 0.01),
+                                .effectInputs("effectCompScores", "heterogeneityCompScores", "Fisher-z scale"),
                                 radioButtons("strategyCompScores", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaCompScores", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterCompScores", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -97,23 +116,20 @@ body <- dashboardBody(
                                          plotOutput("expCovPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("expCovFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("expCovFPOrig"))),
+                                     .rateBox("expCovFPHack", "expCovFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("expCovPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("expCovPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsExpCov", label = "Number of observations per group", value = 30, min = 10, max = 1000),
                                 numericInput("ncovExpCov", label = "Number of covariates", value = 3, min = 1, max = 10),
                                 sliderInput("rcovExpCov", label = "Correlation between covariates", value = 0.3, min = 0, max = 0.99, step = 0.01),
                                 sliderInput("rcovdvExpCov", label = "Correlation between covariates and dependent variable", value = 0.5, min = 0, max = 0.99, step = 0.01),
                                 radioButtons("interactExpCov", label = "Should models include interactions?", choiceNames = list("No", "Yes"), choiceValues = list("No", "Yes")),
+                                .effectInputs("effectExpCov", "heterogeneityExpCov", "standardized mean-difference scale"),
                                 radioButtons("strategyExpCov", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaExpCov", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterExpCov", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -130,19 +146,16 @@ body <- dashboardBody(
                                          plotOutput("expCutPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("expCutFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("expCutFPOrig"))),
+                                     .rateBox("expCutFPHack", "expCutFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("expCutPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("expCutPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsExpCut", label = "Number of observations", value = 30, min = 10, max = 1000),
+                                .effectInputs("effectExpCut", "heterogeneityExpCut", "Fisher-z scale"),
                                 radioButtons("strategyExpCut", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaExpCut", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterExpCut", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -159,22 +172,19 @@ body <- dashboardBody(
                                          plotOutput("favImpPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("favImpFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("favImpFPOrig"))),
+                                     .rateBox("favImpFPHack", "favImpFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("favImpPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("favImpPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=810, background="navy",
+                            box(width=12, status="primary", height=900, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsfavImp", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 sliderInput("missingfavImp", label = "Percentage of missing values", value = 0.1, min = 0, max = 0.5, step = 0.01),
                                 checkboxGroupInput("whichImpfavImp", label = "Imputation methods", choiceNames = list("Delete missing values", "Mean imputation", "Median imputation", "Mode imputation", "Predictive mean matching", "Weighted predictive mean matching", "Sample from observed values", "Bayesian linear regression", "Linear regression ignoring model error", "Linear regression predicted values"),
                                                    choiceValues = as.list(as.character(c(1:10))), selected = as.character(c(1:3))),
+                                .effectInputs("effectfavImp", "heterogeneityfavImp", "Fisher-z scale"),
                                 radioButtons("strategyfavImp", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphafavImp", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterfavImp", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -191,20 +201,17 @@ body <- dashboardBody(
                                          plotOutput("roundingPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("roundingFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("roundingFPOrig"))),
+                                     .rateBox("roundingFPHack", "roundingFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("roundingPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("roundingPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("levelRounding", label = "Rounding margin (e.g., 0.001 if all p-values smaller than 0.051 should be rounded to 0.05)", value = 0.001, min = 0.0001, max = 0.01),
-                                radioButtons("altRounding", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altRounding", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectRounding", "heterogeneityRounding", "standardized mean-difference scale"),
                                 numericInput("alphaRounding", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterRounding", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
                                 actionButton("calcRounding", "Start simulation"),
@@ -220,11 +227,7 @@ body <- dashboardBody(
                                          plotOutput("optStopPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("optStopFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("optStopFPOrig"))),
+                                     .rateBox("optStopFPHack", "optStopFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("optStopPlot4")),
                                      box(width=6, status = "primary",
@@ -234,12 +237,13 @@ body <- dashboardBody(
                                      box(width=6, status = "primary",
                                          plotOutput("optStopPlot7")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nminOptStop", label = "Minimum sample size", value = 10, min = 5, max = 100),
                                 numericInput("nmaxOptStop", label = "Maximum sample size", value = 100, min = 30, max = 500),
                                 sliderInput("stepOptStop", label = "Step size", value = 1, min = 1, max = 10, step = 1),
-                                radioButtons("altOptStop", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altOptStop", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectOptStop", "heterogeneityOptStop", "standardized mean-difference scale"),
                                 numericInput("alphaOptStop", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterOptStop", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
                                 actionButton("calcOptStop", "Start simulation"),
@@ -254,21 +258,18 @@ body <- dashboardBody(
                                          plotOutput("outExclPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("outExclFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("outExclFPOrig"))),
+                                     .rateBox("outExclFPHack", "outExclFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("outExclPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("outExclPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsOutExcl", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 checkboxGroupInput("whichOutExcl", label = "Outlier exclusion methods", choiceNames = list("Boxplot", "Stem & leaf plot", "Standard deviation", "Percentile", "Studentized residuals", "Standardized residuals", "DFBETA", "DFFITS", "Cook's D", "Mahalanobis distance", "Leverage values", "Covariance ratio"),
                                                    choiceValues = as.list(as.character(c(1:12))), selected = as.character(c(1:2))),
+                                .effectInputs("effectOutExcl", "heterogeneityOutExcl", "Fisher-z scale"),
                                 radioButtons("strategyOutExcl", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaOutExcl", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterOutExcl", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -285,11 +286,7 @@ body <- dashboardBody(
                                          plotOutput("SRDVPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("SRDVFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("SRDVFPOrig"))),
+                                     .rateBox("SRDVFPHack", "SRDVFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("SRDVPlot4")),
                                      box(width=6, status = "primary",
@@ -299,12 +296,13 @@ body <- dashboardBody(
                                      box(width=6, status = "primary",
                                          plotOutput("SRDVPlot7")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsSRDV", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 numericInput("nvarSRDV", label = "Number of dependent variables", value = 5, min = 1, max = 100),
                                 sliderInput("rSRDV", label = "Correlation of dependent variables", value = 0.5, min = 0, max = 0.99, step = 0.01),
-                                radioButtons("altSRDV", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altSRDV", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectSRDV", "heterogeneitySRDV", "standardized mean-difference scale"),
                                 radioButtons("strategySRDV", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaSRDV", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterSRDV", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -321,11 +319,7 @@ body <- dashboardBody(
                                          plotOutput("SRIVPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("SRIVFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("SRIVFPOrig"))),
+                                     .rateBox("SRIVFPHack", "SRIVFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("SRIVPlot4")),
                                      box(width=6, status = "primary",
@@ -335,12 +329,13 @@ body <- dashboardBody(
                                      box(width=6, status = "primary",
                                          plotOutput("SRIVPlot7")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsSRIV", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 numericInput("nvarSRIV", label = "Number of independent variables", value = 5, min = 1, max = 100),
                                 sliderInput("rSRIV", label = "Correlation of independent variables", value = 0.5, min = 0, max = 0.99, step = 0.01),
-                                radioButtons("altSRIV", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altSRIV", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectSRIV", "heterogeneitySRIV", "standardized mean-difference scale"),
                                 radioButtons("strategySRIV", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaSRIV", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterSRIV", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -357,16 +352,13 @@ body <- dashboardBody(
                                          plotOutput("statAnalysisPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("statAnalysisFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("statAnalysisFPOrig")))),
+                                     .rateBox("statAnalysisFPHack", "statAnalysisFPOrig"))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsStatAnalysis", label = "Number of observations", value = 30, min = 10, max = 1000),
-                                radioButtons("altStatAnalysis", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altStatAnalysis", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectStatAnalysis", "heterogeneityStatAnalysis", "standardized mean-difference scale"),
                                 radioButtons("strategyStatAnalysis", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaStatAnalysis", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterStatAnalysis", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -383,11 +375,7 @@ body <- dashboardBody(
                                          plotOutput("subgroupPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("subgroupFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("subgroupFPOrig"))),
+                                     .rateBox("subgroupFPHack", "subgroupFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("subgroupPlot4")),
                                      box(width=6, status = "primary",
@@ -397,11 +385,12 @@ body <- dashboardBody(
                                      box(width=6, status = "primary",
                                          plotOutput("subgroupPlot7")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsSubgroup", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 numericInput("nsubvarsSubgroup", label = "Number of subgroup variables", value = 5, min = 1, max = 100),
-                                radioButtons("altSubgroup", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (greater)"), choiceValues = list("two.sided", "greater")),
+                                radioButtons("altSubgroup", label = "Direction of the test", choiceNames = list("Two-sided", "One-sided (positive effect: treatment > control)"), choiceValues = list("two.sided", "greater")),
+                                .effectInputs("effectSubgroup", "heterogeneitySubgroup", "standardized mean-difference scale"),
                                 radioButtons("strategySubgroup", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaSubgroup", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterSubgroup", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
@@ -418,20 +407,17 @@ body <- dashboardBody(
                                          plotOutput("varTransPlot"))),
                             fixedRow(column(width=3,
                                             HTML("")),
-                                     box(width=6, status = "primary",
-                                         h4("False positive rate (p-hacked)"),
-                                         textOutput("varTransFPHack"),
-                                         h4("False positive rate (original)"),
-                                         textOutput("varTransFPOrig"))),
+                                     .rateBox("varTransFPHack", "varTransFPOrig")),
                             fixedRow(box(width=6, status = "primary",
                                          plotOutput("varTransPlot4")),
                                      box(width=6, status = "primary",
                                          plotOutput("varTransPlot5")))),
                      column(width=4,
-                            box(width=12, status="primary", height=800, background="navy",
+                            box(width=12, status="primary", height=890, background="navy",
                                 h4("Simulation settings"),
                                 numericInput("nobsVarTrans", label = "Number of observations", value = 30, min = 10, max = 1000),
                                 radioButtons("transvarVarTrans", label = "Which variables should be transformed?", choiceNames = list("Predictors", "Outcomes", "Predictors and Outcomes"), choiceValues = list("x", "y", "xy")),
+                                .effectInputs("effectVarTrans", "heterogeneityVarTrans", "Fisher-z scale"),
                                 radioButtons("strategyVarTrans", label = "p-value selection method", choiceNames = list("First significant", "Smallest", "Smallest significant"), choiceValues = list("firstsig", "smallest", "smallest.sig")),
                                 numericInput("alphaVarTrans", label = "Significance level", value = 0.05, min = 0.001, max = 0.2),
                                 numericInput("iterVarTrans", label = "Simulation iterations", value = 1000, min = 10, max = 10000),
diff --git a/phackR/inst/sim_startplots_Shiny.R b/phackR/inst/sim_startplots_Shiny.R
index 70a2d9c..b093e35 100644
--- a/phackR/inst/sim_startplots_Shiny.R
+++ b/phackR/inst/sim_startplots_Shiny.R
@@ -4,109 +4,113 @@
 library(phackR)
 startplots <- list()
 
+.escol <- function(simdat, column){
+  match(column, colnames(as.data.frame(simdat)))
+}
+
 # 1: Composite Scores
 
-res1 <- sim.compscoreHack(nobs=30, ncompv=5, rcomp=0.8, ndelete=2, strategy = "firstsig", alpha = 0.05, iter = 1000)
+res1 <- sim.compscoreHack(nobs=30, ncompv=5, rcomp=0.8, ndelete=2, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$compscorePlot <- phackR:::pplots(simdat=res1, alpha=0.05)
-startplots$compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=3, EScolumn.orig=4)
+startplots$compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=.escol(res1, "r2s.hack"), EScolumn.orig=.escol(res1, "r2s.orig"))
 startplots$compscore.fprate.p <- paste0(round(sum(res1[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 startplots$compscore.fprate.o <- paste0(round(sum(res1[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 startplots$res1 <- res1
 
 # 2: Exploit Covariates
 
-res2 <- sim.covhack(nobs.group = 30, ncov = 3, rcov = 0.3, rcovdv = 0.5, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000)
+res2 <- sim.covhack(nobs.group = 30, ncov = 3, rcov = 0.3, rcovdv = 0.5, interactions = FALSE, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$expCovPlot <- phackR:::pplots(simdat=res2, alpha=0.05)
-startplots$expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=3, EScolumn.orig=4, titles = c(expression("Distribution of p-hacked effect sizes "*eta^2),
-                                                                              expression("Distribution of original effect sizes "*eta^2)))
+startplots$expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=.escol(res2, "eta2s.hack"), EScolumn.orig=.escol(res2, "eta2s.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*eta^2),
+                                                                                                                                                    expression("Distribution of original effect sizes "*eta^2)))
 startplots$expcov.fprate.p <- paste0(round(sum(res2[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 startplots$expcov.fprate.o <- paste0(round(sum(res2[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 startplots$res2 <- res2
 
 # 3: Exploit Cutoffs
 
-res3 <- sim.cutoffHack(nobs = 30, strategy = "firstsig", alpha = 0.05, iter = 1000)
+res3 <- sim.cutoffHack(nobs = 30, strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$expCutPlot <- phackR:::pplots(simdat=res3, alpha=0.05)
-startplots$expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=3, EScolumn.orig=4)
+startplots$expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=.escol(res3, "r2s.hack"), EScolumn.orig=.escol(res3, "r2s.orig"))
 startplots$expcut.fprate.p <- paste0(round(sum(res3[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 startplots$expcut.fprate.o <- paste0(round(sum(res3[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 startplots$res3 <- res3
 
 # 4: Favorable Imputation
-res4 <- sim.impHack(nobs = 30, missing = 0.1, which = c(1:3), strategy = "firstsig", alpha = 0.05, iter = 1000)
+res4 <- sim.impHack(nobs = 30, missing = 0.1, which = c(1:3), strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$favImpPlot <- phackR:::pplots(simdat=res4, alpha=0.05)
-startplots$favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=3, EScolumn.orig=4)
+startplots$favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=.escol(res4, "r2s.hack"), EScolumn.orig=.escol(res4, "r2s.orig"))
 startplots$favimp.fprate.p <- paste0(round(sum(res4[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 startplots$favimp.fprate.o <- paste0(round(sum(res4[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 startplots$res4 <- res4
 
 # 5: Incorrect Rounding
-res5 <- sim.roundhack(roundinglevel = 0.051, iter = 1000, alternative = "two.sided", alpha = 0.05)
+res5 <- sim.roundhack(roundinglevel = 0.051, effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05)
 startplots$roundingPlot <- phackR:::pplots(simdat=res5, alpha=0.05)
-startplots$roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=3, EScolumn.orig=4)
+startplots$roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=.escol(res5, "r2s.hack"), EScolumn.orig=.escol(res5, "r2s.orig"))
 startplots$rounding.fprate.p <- paste0(sum(round(res5[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$rounding.fprate.o <- paste0(sum(round(res5[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res5 <- res5
 
 # 6: Optional Stopping
-res6 <- sim.optstop(n.min = 10, n.max = 100, step = 1, alternative = "two.sided", iter = 1000, alpha = 0.05)
+res6 <- sim.optstop(n.min = 10, n.max = 100, step = 1, effect = 0, heterogeneity = 0, alternative = "two.sided", iter = 1000, alpha = 0.05)
 startplots$optstopPlot <- phackR:::pplots(simdat = res6, alpha = 0.05)
-startplots$optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=3, EScolumn.orig=4)
-startplots$optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                expression("Distribution of original effect sizes "*delta)))
+startplots$optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=.escol(res6, "r2s.hack"), EScolumn.orig=.escol(res6, "r2s.orig"))
+startplots$optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=.escol(res6, "ds.hack"), EScolumn.orig=.escol(res6, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                                  expression("Distribution of original effect sizes "*delta)))
 startplots$optstop.fprate.p <- paste0(round(sum(res6[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$optstop.fprate.o <- paste0(round(sum(res6[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res6 <- res6
 
 # 7: Outlier Exclusion
-res7 <- sim.outHack(nobs = 30, which = c(1:2), strategy = "firstsig", alpha = 0.05, iter = 1000)
+res7 <- sim.outHack(nobs = 30, which = c(1:2), strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$outExclPlot <- phackR:::pplots(simdat = res7, alpha = 0.05)
-startplots$outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = 3, EScolumn.orig = 4)
+startplots$outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = .escol(res7, "r2s.hack"), EScolumn.orig = .escol(res7, "r2s.orig"))
 startplots$outExcl.fprate.p <- paste0(round(sum(res7[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$outExcl.fprate.o <- paste0(round(sum(res7[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res7 <- res7
 
 # 9: Selective Reporting DV
-res9 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05)
+res9 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05)
 startplots$SRDVPlot <- phackR:::pplots(simdat = res9, alpha = 0.05)
-startplots$SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=3, EScolumn.orig=4)
-startplots$SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                             expression("Distribution of original effect sizes "*delta)))
+startplots$SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=.escol(res9, "r2s.hack"), EScolumn.orig=.escol(res9, "r2s.orig"))
+startplots$SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=.escol(res9, "ds.hack"), EScolumn.orig=.escol(res9, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                               expression("Distribution of original effect sizes "*delta)))
 startplots$SRDV.fprate.p <- paste0(round(sum(res9[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$SRDV.fprate.o <- paste0(round(sum(res9[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res9 <- res9
 
 # 10: Selective Reporting IV
-res10 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05)
+res10 <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.5, regression = FALSE, strategy = "firstsig", effect = 0, heterogeneity = 0, iter = 1000, alternative = "two.sided", alpha = 0.05)
 startplots$SRIVPlot <- phackR:::pplots(simdat = res10, alpha = 0.05)
-startplots$SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=3, EScolumn.orig=4)
-startplots$SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                              expression("Distribution of original effect sizes "*delta)))
+startplots$SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=.escol(res10, "r2s.hack"), EScolumn.orig=.escol(res10, "r2s.orig"))
+startplots$SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=.escol(res10, "ds.hack"), EScolumn.orig=.escol(res10, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                                expression("Distribution of original effect sizes "*delta)))
 startplots$SRIV.fprate.p <- paste0(round(sum(res10[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$SRIV.fprate.o <- paste0(round(sum(res10[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res10 <- res10
 
 # 11: Statistical Analyses
-res11 <- sim.statAnalysisHack(nobs.group = 30, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000)
+res11 <- sim.statAnalysisHack(nobs.group = 30, strategy = "firstsig", effect = 0, heterogeneity = 0, alternative = "two.sided", alpha = 0.05, iter = 1000)
 startplots$statAnalysisPlot <- phackR:::pplots(simdat = res11, alpha = 0.05)
 startplots$statAnalysis.fprate.p <- paste0(round(sum(res11[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$statAnalysis.fprate.o <- paste0(round(sum(res11[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res11 <- res11
 
 # 12: Subgroup Analyses
-res12 <- sim.subgroupHack(nobs.group = 30, nsubvars = 5, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000)
+res12 <- sim.subgroupHack(nobs.group = 30, nsubvars = 5, effect = 0, heterogeneity = 0, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000)
 startplots$subgroupPlot <- phackR:::pplots(simdat = res12, alpha = 0.05)
-startplots$subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=3, EScolumn.orig=4)
-startplots$subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
-                                                                                  expression("Distribution of original effect sizes "*delta)))
+startplots$subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=.escol(res12, "r2s.hack"), EScolumn.orig=.escol(res12, "r2s.orig"))
+startplots$subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=.escol(res12, "ds.hack"), EScolumn.orig=.escol(res12, "ds.orig"), titles = c(expression("Distribution of p-hacked effect sizes "*delta),
+                                                                                                                                                    expression("Distribution of original effect sizes "*delta)))
 startplots$subgroup.fprate.p <- paste0(round(sum(res12[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$subgroup.fprate.o <- paste0(round(sum(res12[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res12 <- res12
 
 # 13: Variable Transformations
-res13 <- sim.varTransHack(nobs = 30, transvar = "x", strategy = "firstsig", alpha = 0.05, iter = 1000)
+res13 <- sim.varTransHack(nobs = 30, transvar = "x", strategy = "firstsig", effect = 0, heterogeneity = 0, alpha = 0.05, iter = 1000)
 startplots$varTransPlot <- phackR:::pplots(simdat = res13, alpha = 0.05)
-startplots$varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = 3, EScolumn.orig = 4)
+startplots$varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = .escol(res13, "r2s.hack"), EScolumn.orig = .escol(res13, "r2s.orig"))
 startplots$varTrans.fprate.p <- paste0(round(sum(res13[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 startplots$varTrans.fprate.o <- paste0(round(sum(res13[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 startplots$res13 <- res13
diff --git a/phackR/man/dot-combine.phase1.results.Rd b/phackR/man/dot-combine.phase1.results.Rd
new file mode 100644
index 0000000..4f88185
--- /dev/null
+++ b/phackR/man/dot-combine.phase1.results.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.combine.phase1.results}
+\alias{.combine.phase1.results}
+\title{Combine legacy simulation output with the normalized reporting block}
+\usage{
+.combine.phase1.results(res, legacy.fields)
+}
+\arguments{
+\item{res}{List of iteration results}
+
+\item{legacy.fields}{Character vector defining the legacy output columns}
+}
+\description{
+Combine legacy simulation output with the normalized reporting block
+}
diff --git a/phackR/man/dot-compCohensDData.Rd b/phackR/man/dot-compCohensDData.Rd
new file mode 100644
index 0000000..c9c92ee
--- /dev/null
+++ b/phackR/man/dot-compCohensDData.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.compCohensDData}
+\alias{.compCohensDData}
+\title{Compute Cohen's d from the observed data}
+\usage{
+.compCohensDData(control, treatment)
+}
+\arguments{
+\item{control}{values of the control group}
+
+\item{treatment}{values of the treatment group}
+}
+\description{
+Compute Cohen's d from the observed data
+}
diff --git a/phackR/man/dot-compCohensDSE.Rd b/phackR/man/dot-compCohensDSE.Rd
new file mode 100644
index 0000000..7547383
--- /dev/null
+++ b/phackR/man/dot-compCohensDSE.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.compCohensDSE}
+\alias{.compCohensDSE}
+\title{Compute the standard error of Cohen's d}
+\usage{
+.compCohensDSE(d, n1, n2)
+}
+\arguments{
+\item{d}{Cohen's d}
+
+\item{n1}{Sample size in group 1}
+
+\item{n2}{Sample size in group 2}
+}
+\description{
+Compute the standard error of Cohen's d
+}
diff --git a/phackR/man/dot-compCohensDStat.Rd b/phackR/man/dot-compCohensDStat.Rd
new file mode 100644
index 0000000..d37e579
--- /dev/null
+++ b/phackR/man/dot-compCohensDStat.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.compCohensDStat}
+\alias{.compCohensDStat}
+\title{Compute Cohen's d from a test statistic and group sizes}
+\usage{
+.compCohensDStat(statistic, n1, n2)
+}
+\arguments{
+\item{statistic}{Test statistic}
+
+\item{n1}{Sample size in group 1}
+
+\item{n2}{Sample size in group 2}
+}
+\description{
+Compute Cohen's d from a test statistic and group sizes
+}
diff --git a/phackR/man/dot-compFisherZSE.Rd b/phackR/man/dot-compFisherZSE.Rd
new file mode 100644
index 0000000..ddc899f
--- /dev/null
+++ b/phackR/man/dot-compFisherZSE.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.compFisherZSE}
+\alias{.compFisherZSE}
+\title{Compute the standard error of Fisher-z}
+\usage{
+.compFisherZSE(n)
+}
+\arguments{
+\item{n}{Sample size}
+}
+\description{
+Compute the standard error of Fisher-z
+}
diff --git a/phackR/man/dot-draw.study.effect.Rd b/phackR/man/dot-draw.study.effect.Rd
new file mode 100644
index 0000000..89ef6e1
--- /dev/null
+++ b/phackR/man/dot-draw.study.effect.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.draw.study.effect}
+\alias{.draw.study.effect}
+\title{Draw one study-level effect size}
+\usage{
+.draw.study.effect(effect = 0, heterogeneity = 0)
+}
+\arguments{
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+}
+\description{
+Draw one study-level effect size
+}
diff --git a/phackR/man/dot-fisherz_to_r.Rd b/phackR/man/dot-fisherz_to_r.Rd
new file mode 100644
index 0000000..840ef46
--- /dev/null
+++ b/phackR/man/dot-fisherz_to_r.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.fisherz_to_r}
+\alias{.fisherz_to_r}
+\title{Convert a Fisher-z value to a correlation}
+\usage{
+.fisherz_to_r(effect)
+}
+\arguments{
+\item{effect}{Fisher-z effect size}
+}
+\description{
+Convert a Fisher-z value to a correlation
+}
diff --git a/phackR/man/dot-multDVhack.Rd b/phackR/man/dot-multDVhack.Rd
index 68d010d..d9fba55 100644
--- a/phackR/man/dot-multDVhack.Rd
+++ b/phackR/man/dot-multDVhack.Rd
@@ -22,7 +22,7 @@
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test}
 }
diff --git a/phackR/man/dot-multIVhack_reg.Rd b/phackR/man/dot-multIVhack_reg.Rd
index 3b2fddd..f35a40a 100644
--- a/phackR/man/dot-multIVhack_reg.Rd
+++ b/phackR/man/dot-multIVhack_reg.Rd
@@ -22,7 +22,7 @@
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). For the regression path, \code{"greater"} tests a positive association.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 }
diff --git a/phackR/man/dot-multIVhack_ttest.Rd b/phackR/man/dot-multIVhack_ttest.Rd
index 0e65db5..9a0c442 100644
--- a/phackR/man/dot-multIVhack_ttest.Rd
+++ b/phackR/man/dot-multIVhack_ttest.Rd
@@ -22,7 +22,7 @@
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). For the t-test path, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 }
diff --git a/phackR/man/dot-onesided_from_twosided.Rd b/phackR/man/dot-onesided_from_twosided.Rd
new file mode 100644
index 0000000..a5910b0
--- /dev/null
+++ b/phackR/man/dot-onesided_from_twosided.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.onesided_from_twosided}
+\alias{.onesided_from_twosided}
+\title{Convert a two-sided p-value to a one-sided p-value}
+\usage{
+.onesided_from_twosided(p.twosided, stat, alternative = "two.sided")
+}
+\arguments{
+\item{p.twosided}{Two-sided p-value}
+
+\item{stat}{Test statistic}
+
+\item{alternative}{Direction of the test}
+}
+\description{
+Convert a two-sided p-value to a one-sided p-value
+}
diff --git a/phackR/man/dot-optstop.Rd b/phackR/man/dot-optstop.Rd
index 5d3cd61..70be5de 100644
--- a/phackR/man/dot-optstop.Rd
+++ b/phackR/man/dot-optstop.Rd
@@ -31,7 +31,7 @@
 
 \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 }
diff --git a/phackR/man/dot-r_to_fisherz.Rd b/phackR/man/dot-r_to_fisherz.Rd
new file mode 100644
index 0000000..a03527f
--- /dev/null
+++ b/phackR/man/dot-r_to_fisherz.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.r_to_fisherz}
+\alias{.r_to_fisherz}
+\title{Convert a correlation to Fisher-z}
+\usage{
+.r_to_fisherz(r)
+}
+\arguments{
+\item{r}{Correlation}
+}
+\description{
+Convert a correlation to Fisher-z
+}
diff --git a/phackR/man/dot-report.association.Rd b/phackR/man/dot-report.association.Rd
new file mode 100644
index 0000000..29074fb
--- /dev/null
+++ b/phackR/man/dot-report.association.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.report.association}
+\alias{.report.association}
+\title{Build one normalized reporting entry for a simple association analysis}
+\usage{
+.report.association(
+  x,
+  y,
+  method = "lm",
+  alternative = "two.sided",
+  p.override = NULL,
+  stat.override = NULL
+)
+}
+\arguments{
+\item{x}{Predictor values}
+
+\item{y}{Criterion values}
+
+\item{method}{Analysis method}
+
+\item{alternative}{Direction of the test. For association analyses, \code{"greater"} tests a positive association.}
+
+\item{p.override}{Optional p-value override}
+
+\item{stat.override}{Optional statistic override}
+}
+\description{
+Build one normalized reporting entry for a simple association analysis
+}
diff --git a/phackR/man/dot-report.group_lm.Rd b/phackR/man/dot-report.group_lm.Rd
new file mode 100644
index 0000000..670c767
--- /dev/null
+++ b/phackR/man/dot-report.group_lm.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.report.group_lm}
+\alias{.report.group_lm}
+\title{Build one normalized reporting entry for a group effect in a linear model}
+\usage{
+.report.group_lm(formula, data, groupvar = "group", method = "ancova")
+}
+\arguments{
+\item{formula}{Model formula}
+
+\item{data}{Analysis data}
+
+\item{groupvar}{Group variable name}
+
+\item{method}{Analysis method}
+}
+\description{
+Build one normalized reporting entry for a group effect in a linear model
+}
diff --git a/phackR/man/dot-report.multicat.Rd b/phackR/man/dot-report.multicat.Rd
new file mode 100644
index 0000000..f1ac344
--- /dev/null
+++ b/phackR/man/dot-report.multicat.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.report.multicat}
+\alias{.report.multicat}
+\title{Build one normalized reporting entry for a multi-category analysis}
+\usage{
+.report.multicat(group, y, method = "anova")
+}
+\arguments{
+\item{group}{Grouping variable}
+
+\item{y}{Criterion values}
+
+\item{method}{Analysis method}
+}
+\description{
+Build one normalized reporting entry for a multi-category analysis
+}
diff --git a/phackR/man/dot-report.twogroup.Rd b/phackR/man/dot-report.twogroup.Rd
new file mode 100644
index 0000000..aeda8ac
--- /dev/null
+++ b/phackR/man/dot-report.twogroup.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.report.twogroup}
+\alias{.report.twogroup}
+\title{Build one normalized reporting entry for a two-group analysis}
+\usage{
+.report.twogroup(
+  control,
+  treatment,
+  method = "t.equal",
+  alternative = "two.sided",
+  trim = NULL,
+  p.override = NULL
+)
+}
+\arguments{
+\item{control}{values of the control group}
+
+\item{treatment}{values of the treatment group}
+
+\item{method}{Analysis method}
+
+\item{alternative}{Direction of the test. For group-comparison analyses, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
+
+\item{trim}{Trimming level for Yuen's test}
+
+\item{p.override}{Optional p-value override}
+}
+\description{
+Build one normalized reporting entry for a two-group analysis
+}
diff --git a/phackR/man/dot-roundhack.Rd b/phackR/man/dot-roundhack.Rd
index b48d85b..01876fa 100644
--- a/phackR/man/dot-roundhack.Rd
+++ b/phackR/man/dot-roundhack.Rd
@@ -22,7 +22,7 @@
 
 \item{roundinglevel}{Highest p-value that is rounded down to 0.05}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 }
diff --git a/phackR/man/dot-selectanalysis.Rd b/phackR/man/dot-selectanalysis.Rd
new file mode 100644
index 0000000..8761903
--- /dev/null
+++ b/phackR/man/dot-selectanalysis.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.selectanalysis}
+\alias{.selectanalysis}
+\title{Select a p-hacked analysis from a vector of p-values}
+\usage{
+.selectanalysis(ps, strategy, alpha)
+}
+\arguments{
+\item{ps}{Vector of p values}
+
+\item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
+
+\item{alpha}{Significance level (default: 0.05)}
+}
+\description{
+Takes a vector of p-values and returns the index of the selected analysis.
+}
diff --git a/phackR/man/dot-sim.association.Rd b/phackR/man/dot-sim.association.Rd
new file mode 100644
index 0000000..bf5debb
--- /dev/null
+++ b/phackR/man/dot-sim.association.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.sim.association}
+\alias{.sim.association}
+\title{Simulate two continuous variables with a study-level association}
+\usage{
+.sim.association(
+  nobs,
+  effect = 0,
+  heterogeneity = 0,
+  theta = NULL,
+  missing = 0
+)
+}
+\arguments{
+\item{nobs}{Number of observations}
+
+\item{effect}{Mean Fisher-z effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
+\item{theta}{Study-specific true effect size}
+
+\item{missing}{Proportion of missing values per variable}
+}
+\description{
+Simulate two continuous variables with a study-level association
+}
diff --git a/phackR/man/dot-sim.compscore.Rd b/phackR/man/dot-sim.compscore.Rd
index f5353e1..4757687 100644
--- a/phackR/man/dot-sim.compscore.Rd
+++ b/phackR/man/dot-sim.compscore.Rd
@@ -4,7 +4,7 @@
 \alias{.sim.compscore}
 \title{Simulate data: Correlated composite score raw variables and one non-correlated dependent variable}
 \usage{
-.sim.compscore(nobs, ncompv, rcomp)
+.sim.compscore(nobs, ncompv, rcomp, effect = 0, heterogeneity = 0)
 }
 \arguments{
 \item{nobs}{Integer giving number of observations}
@@ -12,6 +12,10 @@
 \item{ncompv}{Integer giving number of variables to build the composite score}
 
 \item{rcomp}{Correlation between the composite score variables}
+
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
 }
 \description{
 Simulate data: Correlated composite score raw variables and one non-correlated dependent variable
diff --git a/phackR/man/dot-sim.covariates.Rd b/phackR/man/dot-sim.covariates.Rd
index 351ce48..872f63d 100644
--- a/phackR/man/dot-sim.covariates.Rd
+++ b/phackR/man/dot-sim.covariates.Rd
@@ -4,7 +4,18 @@
 \alias{.sim.covariates}
 \title{Simulate data with (correlated) covariates}
 \usage{
-.sim.covariates(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0)
+.sim.covariates(
+  nobs.group,
+  ncov,
+  rcov,
+  rcovdv,
+  effect = 0,
+  heterogeneity = 0,
+  mu = 0,
+  sd = 1,
+  missing = 0,
+  theta = NULL
+)
 }
 \arguments{
 \item{nobs.group}{Vector with number of observations per group}
@@ -15,6 +26,10 @@
 
 \item{rcovdv}{Correlation between covariates and dependent variable}
 
+\item{effect}{Mean effect size across studies on the standardized mean-difference scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the standardized mean-difference scale}
+
 \item{mu}{Mean of the random data}
 
 \item{sd}{Standard deviation of the random data}
diff --git a/phackR/man/dot-sim.data.Rd b/phackR/man/dot-sim.data.Rd
index ce67e50..7ba0fdd 100644
--- a/phackR/man/dot-sim.data.Rd
+++ b/phackR/man/dot-sim.data.Rd
@@ -4,10 +4,16 @@
 \alias{.sim.data}
 \title{Generic sampling function}
 \usage{
-.sim.data(nobs.group)
+.sim.data(nobs.group, effect = 0, heterogeneity = 0, theta = NULL)
 }
 \arguments{
 \item{nobs.group}{Number of observations per group. Either a scalar or a vector with two elements.}
+
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
+\item{theta}{Study-specific true effect size}
 }
 \description{
 Outputs a data frame with two columns
diff --git a/phackR/man/dot-sim.multDV.Rd b/phackR/man/dot-sim.multDV.Rd
index 05e20b0..b71b742 100644
--- a/phackR/man/dot-sim.multDV.Rd
+++ b/phackR/man/dot-sim.multDV.Rd
@@ -4,7 +4,7 @@
 \alias{.sim.multDV}
 \title{Simulate dataset with multiple dependent variables}
 \usage{
-.sim.multDV(nobs.group, nvar, r)
+.sim.multDV(nobs.group, nvar, r, effect = 0, heterogeneity = 0)
 }
 \arguments{
 \item{nobs.group}{Vector giving number of observations per group}
@@ -12,6 +12,10 @@
 \item{nvar}{Number of dependent variables in the data frame}
 
 \item{r}{Desired correlation between the dependent variables (scalar)}
+
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
 }
 \description{
 Outputs data frame with a grouping variable and multiple correlated dependent variables
diff --git a/phackR/man/dot-sim.multIV.Rd b/phackR/man/dot-sim.multIV.Rd
index f72b50a..6ec50d3 100644
--- a/phackR/man/dot-sim.multIV.Rd
+++ b/phackR/man/dot-sim.multIV.Rd
@@ -4,7 +4,14 @@
 \alias{.sim.multIV}
 \title{Simulate dataset with multiple independent variables}
 \usage{
-.sim.multIV(nobs.group, nvar, r, regression = FALSE)
+.sim.multIV(
+  nobs.group,
+  nvar,
+  r,
+  regression = FALSE,
+  effect = 0,
+  heterogeneity = 0
+)
 }
 \arguments{
 \item{nobs.group}{Scalar defining number of observations per group (or number of observations in predictors in regression)}
@@ -14,6 +21,10 @@
 \item{r}{Desired correlation between the independent variables (scalar)}
 
 \item{regression}{Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)}
+
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
 }
 \description{
 Outputs data frame with multiple independent variables
diff --git a/phackR/man/dot-sim.multregression.Rd b/phackR/man/dot-sim.multregression.Rd
new file mode 100644
index 0000000..ed59641
--- /dev/null
+++ b/phackR/man/dot-sim.multregression.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.sim.multregression}
+\alias{.sim.multregression}
+\title{Simulate one criterion and multiple predictors with shared study-level association}
+\usage{
+.sim.multregression(nobs, nvar, r, effect = 0, heterogeneity = 0, theta = NULL)
+}
+\arguments{
+\item{nobs}{Number of observations}
+
+\item{nvar}{Number of predictor variables}
+
+\item{r}{Correlation between predictor variables}
+
+\item{effect}{Mean Fisher-z effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
+\item{theta}{Study-specific true effect size}
+}
+\description{
+Simulate one criterion and multiple predictors with shared study-level association
+}
diff --git a/phackR/man/dot-sim.subgroup.Rd b/phackR/man/dot-sim.subgroup.Rd
index a0bf72d..69b512b 100644
--- a/phackR/man/dot-sim.subgroup.Rd
+++ b/phackR/man/dot-sim.subgroup.Rd
@@ -4,12 +4,16 @@
 \alias{.sim.subgroup}
 \title{Simulate data with subgroups}
 \usage{
-.sim.subgroup(nobs.group, nsubvars)
+.sim.subgroup(nobs.group, nsubvars, effect = 0, heterogeneity = 0)
 }
 \arguments{
 \item{nobs.group}{Vector giving number of observations per group}
 
 \item{nsubvars}{Integer specifying number of variables for potential subgroups}
+
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
 }
 \description{
 Outputs data frame with multiple binary variables from which subgroups can be extracted
diff --git a/phackR/man/dot-statAnalysisHack.Rd b/phackR/man/dot-statAnalysisHack.Rd
index f4b1c64..a8bb940 100644
--- a/phackR/man/dot-statAnalysisHack.Rd
+++ b/phackR/man/dot-statAnalysisHack.Rd
@@ -22,7 +22,7 @@
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test}
 }
diff --git a/phackR/man/dot-subgroupHack.Rd b/phackR/man/dot-subgroupHack.Rd
index 26e8cb2..3df562b 100644
--- a/phackR/man/dot-subgroupHack.Rd
+++ b/phackR/man/dot-subgroupHack.Rd
@@ -23,7 +23,7 @@
 
 \item{subvars}{Vector specifying the location of the subgroup variables in the data frame}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
diff --git a/phackR/man/sim.compscoreHack.Rd b/phackR/man/sim.compscoreHack.Rd
index ed0788f..891a90c 100644
--- a/phackR/man/sim.compscoreHack.Rd
+++ b/phackR/man/sim.compscoreHack.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/compositeScores.R
 \name{sim.compscoreHack}
 \alias{sim.compscoreHack}
-\title{Simulate p-hacking with composite scores
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
+\title{Simulate p-hacking with composite scores}
 \usage{
 sim.compscoreHack(
   nobs,
@@ -11,6 +10,8 @@ sim.compscoreHack(
   rcomp,
   ndelete,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -27,6 +28,10 @@ sim.compscoreHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{iter}{Number of simulation iterations}
@@ -34,6 +39,5 @@ sim.compscoreHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-hacking with composite scores
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.covhack.Rd b/phackR/man/sim.covhack.Rd
index 1dbfa25..14f8ba5 100644
--- a/phackR/man/sim.covhack.Rd
+++ b/phackR/man/sim.covhack.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/exploitCovariates.R
 \name{sim.covhack}
 \alias{sim.covhack}
-\title{Simulate p-Hacking with multiple covariates
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
+\title{Simulate p-Hacking with multiple covariates}
 \usage{
 sim.covhack(
   nobs.group,
@@ -12,6 +11,8 @@ sim.covhack(
   rcovdv,
   interactions = FALSE,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -30,6 +31,10 @@ sim.covhack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the standardized mean-difference scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the standardized mean-difference scale}
+
 \item{alpha}{Significance level of the t-test}
 
 \item{iter}{Number of simulation iterations}
@@ -37,6 +42,5 @@ sim.covhack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-Hacking with multiple covariates
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.cutoffHack.Rd b/phackR/man/sim.cutoffHack.Rd
index 70be275..1a0b57c 100644
--- a/phackR/man/sim.cutoffHack.Rd
+++ b/phackR/man/sim.cutoffHack.Rd
@@ -2,12 +2,13 @@
 % Please edit documentation in R/exploitCutoffs.R
 \name{sim.cutoffHack}
 \alias{sim.cutoffHack}
-\title{Simulate p-Hacking for exploiting cutoff values
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
+\title{Simulate p-Hacking for exploiting cutoff values}
 \usage{
 sim.cutoffHack(
   nobs,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -18,6 +19,10 @@ sim.cutoffHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
 \item{alpha}{Significance level of the t-test}
 
 \item{iter}{Number of simulation iterations}
@@ -25,6 +30,5 @@ sim.cutoffHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-Hacking for exploiting cutoff values
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.impHack.Rd b/phackR/man/sim.impHack.Rd
index 071e1ab..12a1993 100644
--- a/phackR/man/sim.impHack.Rd
+++ b/phackR/man/sim.impHack.Rd
@@ -9,6 +9,8 @@ sim.impHack(
   missing,
   which = c(1:10),
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -23,6 +25,10 @@ sim.impHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{iter}{Number of simulation iterations}
@@ -30,5 +36,5 @@ sim.impHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.multDVhack.Rd b/phackR/man/sim.multDVhack.Rd
index 46e30e2..c51e135 100644
--- a/phackR/man/sim.multDVhack.Rd
+++ b/phackR/man/sim.multDVhack.Rd
@@ -9,6 +9,8 @@ sim.multDVhack(
   nvar,
   r,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   iter = 1000,
   alternative = "two.sided",
   alpha = 0.05,
@@ -24,14 +26,18 @@ sim.multDVhack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
 \item{iter}{Number of simulation iterations}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.multIVhack.Rd b/phackR/man/sim.multIVhack.Rd
index de2fa47..37bc868 100644
--- a/phackR/man/sim.multIVhack.Rd
+++ b/phackR/man/sim.multIVhack.Rd
@@ -10,6 +10,8 @@ sim.multIVhack(
   r,
   regression = FALSE,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   iter = 1000,
   alternative = "two.sided",
   alpha = 0.05,
@@ -27,14 +29,18 @@ sim.multIVhack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies. For \code{regression = FALSE}, this is on the standardized mean-difference scale. For \code{regression = TRUE}, it is on the Fisher-z scale.}
+
+\item{heterogeneity}{Between-study heterogeneity. For \code{regression = FALSE}, this is on the standardized mean-difference scale. For \code{regression = TRUE}, it is on the Fisher-z scale.}
+
 \item{iter}{Number of simulation iterations}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). For \code{regression = FALSE}, \code{"greater"} tests whether the treatment or second group exceeds the control or first group. For \code{regression = TRUE}, it tests a positive association.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.optstop.Rd b/phackR/man/sim.optstop.Rd
index 927ccd5..0e874a7 100644
--- a/phackR/man/sim.optstop.Rd
+++ b/phackR/man/sim.optstop.Rd
@@ -2,13 +2,15 @@
 % Please edit documentation in R/optionalStopping.R
 \name{sim.optstop}
 \alias{sim.optstop}
-\title{Simulate p-hacking with incorrect rounding}
+\title{Simulate p-hacking with optional stopping}
 \usage{
 sim.optstop(
   n.min,
   n.max,
   step = 1,
   peek = NULL,
+  effect = 0,
+  heterogeneity = 0,
   alternative = "two.sided",
   iter = 1000,
   alpha = 0.05,
@@ -24,7 +26,11 @@ sim.optstop(
 
 \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{iter}{Number of iterations}
 
@@ -33,5 +39,5 @@ sim.optstop(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-hacking with incorrect rounding
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.outHack.Rd b/phackR/man/sim.outHack.Rd
index d282d4f..086d0f1 100644
--- a/phackR/man/sim.outHack.Rd
+++ b/phackR/man/sim.outHack.Rd
@@ -8,6 +8,8 @@ sim.outHack(
   nobs,
   which = c(1:12),
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -20,6 +22,10 @@ sim.outHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{iter}{Number of simulation iterations}
@@ -27,5 +33,5 @@ sim.outHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.roundhack.Rd b/phackR/man/sim.roundhack.Rd
index 979df89..081f74f 100644
--- a/phackR/man/sim.roundhack.Rd
+++ b/phackR/man/sim.roundhack.Rd
@@ -6,6 +6,8 @@
 \usage{
 sim.roundhack(
   roundinglevel,
+  effect = 0,
+  heterogeneity = 0,
   iter = 1000,
   alternative = "two.sided",
   alpha = 0.05,
@@ -15,14 +17,18 @@ sim.roundhack(
 \arguments{
 \item{roundinglevel}{Highest p-value that is rounded down to alpha}
 
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
 \item{iter}{Number of iterations}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-hacking with incorrect rounding
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.statAnalysisHack.Rd b/phackR/man/sim.statAnalysisHack.Rd
index 0345e3e..c05c156 100644
--- a/phackR/man/sim.statAnalysisHack.Rd
+++ b/phackR/man/sim.statAnalysisHack.Rd
@@ -7,6 +7,8 @@
 sim.statAnalysisHack(
   nobs.group,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alternative = "two.sided",
   alpha = 0.05,
   iter = 1000,
@@ -18,7 +20,11 @@ sim.statAnalysisHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{alpha}{Significance level of the t-test}
 
@@ -27,5 +33,5 @@ sim.statAnalysisHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.subgroupHack.Rd b/phackR/man/sim.subgroupHack.Rd
index 662f47a..a938cef 100644
--- a/phackR/man/sim.subgroupHack.Rd
+++ b/phackR/man/sim.subgroupHack.Rd
@@ -2,12 +2,13 @@
 % Please edit documentation in R/subgroupAnalysis.R
 \name{sim.subgroupHack}
 \alias{sim.subgroupHack}
-\title{Simulate p-hacking with multiple subgroups
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
+\title{Simulate p-hacking with multiple subgroups}
 \usage{
 sim.subgroupHack(
   nobs.group,
   nsubvars,
+  effect = 0,
+  heterogeneity = 0,
   alternative = "two.sided",
   strategy = "firstsig",
   alpha = 0.05,
@@ -20,7 +21,11 @@ sim.subgroupHack(
 
 \item{nsubvars}{Integer specifying number of variables for potential subgroups}
 
-\item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
+\item{effect}{Mean effect size across studies}
+
+\item{heterogeneity}{Between-study heterogeneity}
+
+\item{alternative}{Direction of the t-test ("two.sided", "less", "greater"). Here, \code{"greater"} tests whether the treatment or second group exceeds the control or first group.}
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
@@ -31,6 +36,5 @@ sim.subgroupHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-hacking with multiple subgroups
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/man/sim.varTransHack.Rd b/phackR/man/sim.varTransHack.Rd
index 2556ddf..6c4f139 100644
--- a/phackR/man/sim.varTransHack.Rd
+++ b/phackR/man/sim.varTransHack.Rd
@@ -2,14 +2,15 @@
 % Please edit documentation in R/variableTransformation.R
 \name{sim.varTransHack}
 \alias{sim.varTransHack}
-\title{Simulate p-hacking with variable transformations
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
+\title{Simulate p-hacking with variable transformations}
 \usage{
 sim.varTransHack(
   nobs,
   transvar,
   testnorm = FALSE,
   strategy = "firstsig",
+  effect = 0,
+  heterogeneity = 0,
   alpha = 0.05,
   iter = 1000,
   shinyEnv = FALSE
@@ -24,6 +25,10 @@ sim.varTransHack(
 
 \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
 
+\item{effect}{Mean effect size across studies on the Fisher-z scale}
+
+\item{heterogeneity}{Between-study heterogeneity on the Fisher-z scale}
+
 \item{alpha}{Significance level of the t-test (default: 0.05)}
 
 \item{iter}{Number of simulation iterations}
@@ -31,6 +36,5 @@ sim.varTransHack(
 \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
 }
 \description{
-Simulate p-hacking with variable transformations
-Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
+Outputs a data frame containing the p-hacked p-values (\code{ps.hack}), the original p-values (\code{ps.orig}), and a normalized reporting block from all iterations
 }
diff --git a/phackR/tests/testthat/test-simfunctions.R b/phackR/tests/testthat/test-simfunctions.R
index d34a639..4116075 100644
--- a/phackR/tests/testthat/test-simfunctions.R
+++ b/phackR/tests/testthat/test-simfunctions.R
@@ -4,10 +4,12 @@ test_that("Selective Reporting of DV works", {
 
   set.seed(2345)
   phack.ambitious <- sim.multDVhack(nobs.group = c(30, 30), nvar = 5, r = 0.3,
+                                    effect = 0, heterogeneity = 0,
                                     strategy = "smallest.sig", iter = 100,
                                     alternative = "two.sided", alpha = 0.05)
   set.seed(2345)
   phack.normal <- sim.multDVhack(nobs.group = c(30, 30), nvar = 5, r = 0.3,
+                                 effect = 0, heterogeneity = 0,
                                  strategy = "firstsig", iter = 100,
                                  alternative = "two.sided", alpha = 0.05)
 
@@ -22,10 +24,12 @@ test_that("Selective Reporting of DV works", {
 
   set.seed(2345)
   phack.ambitious2 <- sim.multDVhack(nobs.group = c(30, 30), nvar = 15, r = 0.3,
+                                     effect = 0, heterogeneity = 0,
                                      strategy = "smallest", iter = 100,
                                      alternative = "two.sided", alpha = 0.05)
   set.seed(2345)
   phack.normal2 <- sim.multDVhack(nobs.group = c(30, 30), nvar = 15, r = 0.3,
+                                  effect = 0, heterogeneity = 0,
                                   strategy = "firstsig", iter = 100,
                                   alternative = "two.sided", alpha = 0.05)
   expect_gt(length(which(phack.ambitious2[ ,1] < 0.05)),
@@ -38,10 +42,12 @@ test_that("Selective Reporting of IV works", {
 
   set.seed(1234)
   phack.ambitious <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                                    effect = 0, heterogeneity = 0,
                                     strategy = "smallest.sig", iter = 100,
                                     alternative = "two.sided", alpha = 0.05)
   set.seed(1234)
   phack.normal <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                                 effect = 0, heterogeneity = 0,
                                  strategy = "firstsig", iter = 100,
                                  alternative = "two.sided", alpha = 0.05)
 
@@ -56,10 +62,12 @@ test_that("Selective Reporting of IV works", {
 
   set.seed(2345)
   phack.ambitious2 <- sim.multIVhack(nobs.group = c(30, 30), nvar = 15, r = 0.3,
+                                     effect = 0, heterogeneity = 0,
                                      strategy = "smallest", iter = 100,
                                      alternative = "two.sided", alpha = 0.05)
   set.seed(2345)
   phack.normal2 <- sim.multIVhack(nobs.group = c(30, 30), nvar = 15, r = 0.3,
+                                  effect = 0, heterogeneity = 0,
                                   strategy = "firstsig", iter = 100,
                                   alternative = "two.sided", alpha = 0.05)
   expect_gt(length(which(phack.ambitious2[,1] < 0.05)),
@@ -73,9 +81,11 @@ test_that("Incorrect Rounding works", {
 
   set.seed(1234)
   phack1 <- sim.roundhack(0.1, iter = 100, alternative = "two.sided",
+                          effect = 0, heterogeneity = 0,
                           alpha = 0.05)
   set.seed(1234)
   phack2 <- sim.roundhack(0.06, iter = 100, alternative = "two.sided",
+                          effect = 0, heterogeneity = 0,
                           alpha = 0.05)
 
   expect_equal(nrow(phack1), 100)
@@ -91,9 +101,11 @@ test_that("Optional Stopping works", {
 
   set.seed(1234)
   optstop1 <- sim.optstop(n.min = 10, n.max = 50, step = 5,
+                          effect = 0, heterogeneity = 0,
                           alternative = "two.sided", iter = 100, alpha = 0.05)
   set.seed(1234)
   optstop2 <- sim.optstop(n.min = 20, n.max = 50, step = 5,
+                          effect = 0, heterogeneity = 0,
                           alternative = "two.sided", iter = 100, alpha = 0.05)
 
   expect_equal(nrow(optstop1), 100)
@@ -109,9 +121,11 @@ test_that("Outlier Exclusion works", {
 
   set.seed(1234)
   outexcl1 <- sim.outHack(nobs = 20, which = "random",
+                          effect = 0, heterogeneity = 0,
                           strategy = "firstsig", alpha = 0.05, iter = 100)
   set.seed(1234)
   outexcl2 <- sim.outHack(nobs = 20, which = "random",
+                          effect = 0, heterogeneity = 0,
                           strategy = "smallest.sig", alpha = 0.05, iter = 100)
 
   expect_equal(nrow(outexcl1), 100)
@@ -127,10 +141,12 @@ test_that("Exploiting Covariates works", {
 
   set.seed(1234)
   covhack1 <- sim.covhack(nobs.group = 20, ncov = 3, rcov = 0.1, rcovdv = 0.6,
+                          effect = 0, heterogeneity = 0,
                           interactions = FALSE, strategy = "firstsig",
                           alpha = 0.05, iter = 100)
   set.seed(1234)
   covhack2 <- sim.covhack(nobs.group = 20, ncov = 3, rcov = 0.1, rcovdv = 0.6,
+                          effect = 0, heterogeneity = 0,
                           interactions = FALSE, strategy = "smallest.sig",
                           alpha = 0.05, iter = 100)
 
@@ -147,11 +163,13 @@ test_that("Subgroup Analyses work", {
 
   set.seed(1234)
   subgrhack1 <- sim.subgroupHack(nobs.group = 30, nsubvars = 3,
+                                 effect = 0, heterogeneity = 0,
                                  alternative = "two.sided",
                                  strategy = "firstsig", alpha = 0.05,
                                  iter = 100)
   set.seed(1234)
   subgrhack2 <- sim.subgroupHack(nobs.group = 30, nsubvars = 3,
+                                 effect = 0, heterogeneity = 0,
                                  alternative = "two.sided",
                                  strategy = "smallest.sig", alpha = 0.05,
                                  iter = 100)
@@ -165,14 +183,339 @@ test_that("Subgroup Analyses work", {
 
 })
 
+test_that("Selective Reporting of IV regression works", {
+
+  set.seed(1234)
+  reghack1 <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                             regression = TRUE, effect = 0, heterogeneity = 0,
+                             strategy = "firstsig", iter = 100,
+                             alternative = "two.sided", alpha = 0.05)
+  set.seed(1234)
+  reghack2 <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                             regression = TRUE, effect = 0, heterogeneity = 0,
+                             strategy = "smallest.sig", iter = 100,
+                             alternative = "two.sided", alpha = 0.05)
+
+  expect_equal(nrow(reghack1), 100)
+  expect_equal(reghack1[,2], reghack2[,2])
+  expect_equal(length(which(reghack1[,1] < 0.05)),
+               length(which(reghack2[,1] < 0.05)))
+  expect_gt(length(which(reghack1[,1] < 0.05)),
+            length(which(reghack1[,2] < 0.05)))
+
+})
+
+test_that("Group-comparison reporting columns are populated", {
+
+  phase1_cols <- c("effect.initial", "effect.final", "se.initial", "se.final",
+                   "n.initial", "n.final", "stat.initial", "stat.final",
+                   "p.initial", "p.final", "method.initial", "method.final")
+
+  set.seed(2024)
+  multdv <- sim.multDVhack(nobs.group = c(30, 30), nvar = 4, r = 0.3,
+                           effect = 0, heterogeneity = 0, strategy = "firstsig",
+                           iter = 20, alternative = "two.sided", alpha = 0.05)
+  set.seed(2024)
+  statanalysis <- sim.statAnalysisHack(nobs.group = c(30, 30),
+                                       effect = 0, heterogeneity = 0,
+                                       strategy = "firstsig",
+                                       alternative = "two.sided",
+                                       alpha = 0.05, iter = 20)
+
+  expect_true(all(phase1_cols %in% names(multdv)))
+  expect_true(all(phase1_cols %in% names(statanalysis)))
+  expect_equal(multdv$p.initial, multdv$ps.orig)
+  expect_equal(multdv$p.final, multdv$ps.hack)
+  expect_equal(statanalysis$p.initial, statanalysis$ps.orig)
+  expect_equal(statanalysis$p.final, statanalysis$ps.hack)
+  expect_true(all(is.finite(multdv$effect.initial)))
+  expect_true(all(is.finite(multdv$effect.final)))
+  expect_true(all(is.finite(multdv$se.initial)))
+  expect_true(all(is.finite(multdv$se.final)))
+  expect_true(all(multdv$method.initial == "t.equal"))
+  expect_true(all(nchar(multdv$method.final) > 0))
+  expect_true(all(is.finite(statanalysis$effect.initial)))
+  expect_true(all(is.finite(statanalysis$effect.final)))
+  expect_true(all(nchar(statanalysis$method.initial) > 0))
+  expect_true(all(nchar(statanalysis$method.final) > 0))
+
+})
+
+test_that("Group-comparison heterogeneity remains reproducible and non-degenerate", {
+
+  set.seed(2025)
+  hetero1 <- sim.multDVhack(nobs.group = c(30, 30), nvar = 4, r = 0.3,
+                            effect = 0, heterogeneity = 0.5,
+                            strategy = "firstsig", iter = 20,
+                            alternative = "two.sided", alpha = 0.05)
+  set.seed(2025)
+  hetero2 <- sim.multDVhack(nobs.group = c(30, 30), nvar = 4, r = 0.3,
+                            effect = 0, heterogeneity = 0.5,
+                            strategy = "firstsig", iter = 20,
+                            alternative = "two.sided", alpha = 0.05)
+
+  expect_equal(hetero1, hetero2)
+  expect_true(all(is.finite(hetero1$effect.initial)))
+  expect_true(all(is.finite(hetero1$effect.final)))
+  expect_gt(sd(hetero1$effect.initial), 0)
+  expect_gt(length(unique(round(hetero1$effect.initial, 8))), 1)
+
+})
+
+test_that("Regression and ANCOVA reporting columns are populated", {
+
+  phase2_cols <- c("effect.initial", "effect.final", "se.initial", "se.final",
+                   "n.initial", "n.final", "stat.initial", "stat.final",
+                   "p.initial", "p.final", "method.initial", "method.final")
+
+  set.seed(2028)
+  multiv_reg_null <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                                    effect = 0, heterogeneity = 0,
+                                    regression = TRUE, strategy = "firstsig",
+                                    iter = 200, alternative = "two.sided",
+                                    alpha = 0.05)
+  set.seed(2028)
+  multiv_reg_effect1 <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                                       effect = 0.4, heterogeneity = 0,
+                                       regression = TRUE, strategy = "firstsig",
+                                       iter = 200, alternative = "two.sided",
+                                       alpha = 0.05)
+  set.seed(2028)
+  multiv_reg_effect2 <- sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3,
+                                       effect = 0.4, heterogeneity = 0,
+                                       regression = TRUE, strategy = "firstsig",
+                                       iter = 200, alternative = "two.sided",
+                                       alpha = 0.05)
+
+  expect_equal(multiv_reg_effect1[, 2], multiv_reg_effect2[, 2])
+  expect_true(all(phase2_cols %in% names(multiv_reg_effect1)))
+  expect_equal(multiv_reg_effect1$p.initial, multiv_reg_effect1$ps.orig)
+  expect_equal(multiv_reg_effect1$p.final, multiv_reg_effect1$ps.hack)
+  expect_true(all(is.finite(multiv_reg_effect1$effect.initial)))
+  expect_true(all(is.finite(multiv_reg_effect1$effect.final)))
+  expect_true(all(is.finite(multiv_reg_effect1$se.initial)))
+  expect_true(all(is.finite(multiv_reg_effect1$se.final)))
+  expect_true(all(nchar(multiv_reg_effect1$method.initial) > 0))
+  expect_true(all(nchar(multiv_reg_effect1$method.final) > 0))
+  expect_gt(sum(multiv_reg_effect1$ps.orig < 0.05),
+            sum(multiv_reg_null$ps.orig < 0.05))
+
+  set.seed(2029)
+  covhack_hetero1 <- sim.covhack(nobs.group = 20, ncov = 3, rcov = 0.1,
+                                 rcovdv = 0.6, interactions = FALSE,
+                                 strategy = "firstsig", effect = 0,
+                                 heterogeneity = 0.4, alpha = 0.05, iter = 20)
+  set.seed(2029)
+  covhack_hetero2 <- sim.covhack(nobs.group = 20, ncov = 3, rcov = 0.1,
+                                 rcovdv = 0.6, interactions = FALSE,
+                                 strategy = "firstsig", effect = 0,
+                                 heterogeneity = 0.4, alpha = 0.05, iter = 20)
+
+  expect_equal(covhack_hetero1, covhack_hetero2)
+  expect_true(all(phase2_cols %in% names(covhack_hetero1)))
+  expect_equal(covhack_hetero1$p.initial, covhack_hetero1$ps.orig)
+  expect_equal(covhack_hetero1$p.final, covhack_hetero1$ps.hack)
+  expect_true(all(is.finite(covhack_hetero1$effect.initial)))
+  expect_true(all(is.finite(covhack_hetero1$effect.final)))
+  expect_gt(sd(covhack_hetero1$effect.initial), 0)
+  expect_gt(length(unique(round(covhack_hetero1$effect.initial, 8))), 1)
+  expect_true(all(nchar(covhack_hetero1$method.initial) > 0))
+  expect_true(all(nchar(covhack_hetero1$method.final) > 0))
+
+})
+
+test_that("Group-comparison non-null effects increase original rejection rates", {
+
+  set.seed(2026)
+  nullres <- sim.multDVhack(nobs.group = c(30, 30), nvar = 4, r = 0.3,
+                            effect = 0, heterogeneity = 0,
+                            strategy = "firstsig", iter = 200,
+                            alternative = "two.sided", alpha = 0.05)
+  set.seed(2026)
+  effectres <- sim.multDVhack(nobs.group = c(30, 30), nvar = 4, r = 0.3,
+                              effect = 0.5, heterogeneity = 0,
+                              strategy = "firstsig", iter = 200,
+                              alternative = "two.sided", alpha = 0.05)
+
+  expect_gt(sum(effectres$ps.orig < 0.05), sum(nullres$ps.orig < 0.05))
+
+})
+
+test_that("Optional stopping sample sizes reflect the selected analysis", {
+
+  set.seed(2027)
+  optstop <- sim.optstop(n.min = 10, n.max = 30, step = 5,
+                         effect = 0.8, heterogeneity = 0,
+                         alternative = "two.sided", iter = 50, alpha = 0.05)
+
+  expect_true(all(optstop$n.initial == 60))
+  expect_true(all(optstop$n.final <= optstop$n.initial))
+  expect_true(any(optstop$n.final < optstop$n.initial))
+
+})
+
+test_that("Correlation-family reporting columns are populated", {
+
+  phase2_cols <- c("effect.initial", "effect.final", "se.initial", "se.final",
+                   "n.initial", "n.final", "stat.initial", "stat.final",
+                   "p.initial", "p.final", "method.initial", "method.final")
+
+  set.seed(2028)
+  cutoff <- sim.cutoffHack(nobs = 30, strategy = "firstsig",
+                           effect = 0, heterogeneity = 0,
+                           alpha = 0.05, iter = 20)
+  set.seed(2028)
+  covhack <- sim.covhack(nobs.group = 30, ncov = 3, rcov = 0.2, rcovdv = 0.5,
+                         effect = 0, heterogeneity = 0,
+                         interactions = FALSE, strategy = "firstsig",
+                         alpha = 0.05, iter = 20)
+  set.seed(2028)
+  regiv <- sim.multIVhack(nobs.group = 30, nvar = 4, r = 0.2,
+                          regression = TRUE, effect = 0, heterogeneity = 0,
+                          strategy = "firstsig", alpha = 0.05, iter = 20)
+
+  expect_true(all(phase2_cols %in% names(cutoff)))
+  expect_true(all(phase2_cols %in% names(covhack)))
+  expect_true(all(phase2_cols %in% names(regiv)))
+  expect_equal(cutoff$p.initial, cutoff$ps.orig)
+  expect_equal(cutoff$p.final, cutoff$ps.hack)
+  expect_equal(covhack$p.initial, covhack$ps.orig)
+  expect_equal(covhack$p.final, covhack$ps.hack)
+  expect_equal(regiv$p.initial, regiv$ps.orig)
+  expect_equal(regiv$p.final, regiv$ps.hack)
+  expect_true(all(is.finite(cutoff$effect.initial)))
+  expect_true(all(is.finite(cutoff$effect.final)))
+  expect_true(all(is.finite(covhack$effect.initial)))
+  expect_true(all(is.finite(covhack$effect.final)))
+  expect_true(all(is.finite(regiv$effect.initial)))
+  expect_true(all(is.finite(regiv$effect.final)))
+  expect_true(all(nchar(cutoff$method.final) > 0))
+  expect_true(all(nchar(covhack$method.final) > 0))
+  expect_true(all(nchar(regiv$method.final) > 0))
+
+})
+
+test_that("Correlation-family heterogeneity remains reproducible and non-degenerate", {
+
+  set.seed(2029)
+  hetero1 <- sim.multIVhack(nobs.group = 30, nvar = 4, r = 0.2,
+                            regression = TRUE, effect = 0,
+                            heterogeneity = 0.5, strategy = "firstsig",
+                            alpha = 0.05, iter = 20)
+  set.seed(2029)
+  hetero2 <- sim.multIVhack(nobs.group = 30, nvar = 4, r = 0.2,
+                            regression = TRUE, effect = 0,
+                            heterogeneity = 0.5, strategy = "firstsig",
+                            alpha = 0.05, iter = 20)
+
+  expect_equal(hetero1, hetero2)
+  expect_true(all(is.finite(hetero1$effect.initial)))
+  expect_true(all(is.finite(hetero1$effect.final)))
+  expect_gt(sd(hetero1$effect.initial), 0)
+  expect_gt(length(unique(round(hetero1$effect.initial, 8))), 1)
+
+})
+
+test_that("Correlation-family non-null effects increase original rejection rates", {
+
+  set.seed(2030)
+  nullres <- sim.cutoffHack(nobs = 30, strategy = "firstsig",
+                            effect = 0, heterogeneity = 0,
+                            alpha = 0.05, iter = 200)
+  set.seed(2030)
+  effectres <- sim.cutoffHack(nobs = 30, strategy = "firstsig",
+                              effect = 0.4, heterogeneity = 0,
+                              alpha = 0.05, iter = 200)
+
+  expect_gt(sum(effectres$ps.orig < 0.05), sum(nullres$ps.orig < 0.05))
+
+})
+
+test_that("One-sided group-comparison tests follow the positive-effect direction", {
+
+  set.seed(2031)
+  dat <- phackR:::.sim.data(nobs.group = 40, effect = 0.8, heterogeneity = 0)
+  control <- dat[dat[,1] == 1, 2]
+  treatment <- dat[dat[,1] == 2, 2]
+
+  report.t.greater <- phackR:::.report.twogroup(control = control,
+                                                treatment = treatment,
+                                                method = "t.equal",
+                                                alternative = "greater")
+  report.t.less <- phackR:::.report.twogroup(control = control,
+                                             treatment = treatment,
+                                             method = "t.equal",
+                                             alternative = "less")
+  report.w.greater <- phackR:::.report.twogroup(control = control,
+                                                treatment = treatment,
+                                                method = "wilcox",
+                                                alternative = "greater")
+  report.y.greater <- phackR:::.report.twogroup(control = control,
+                                                treatment = treatment,
+                                                method = "yuen",
+                                                trim = 0.2,
+                                                alternative = "greater")
+  report.y.less <- phackR:::.report.twogroup(control = control,
+                                             treatment = treatment,
+                                             method = "yuen",
+                                             trim = 0.2,
+                                             alternative = "less")
+
+  expect_gt(report.t.greater$stat, 0)
+  expect_gt(report.t.greater$effect, 0)
+  expect_lt(report.t.greater$p, report.t.less$p)
+  expect_lt(report.w.greater$p, 0.05)
+  expect_lt(report.y.greater$p, report.y.less$p)
+
+})
+
+test_that("One-sided simulators reward positive effects with alternative greater", {
+
+  set.seed(2032)
+  greater <- sim.multDVhack(nobs.group = 30, nvar = 4, r = 0.3,
+                            effect = 0.8, heterogeneity = 0,
+                            strategy = "firstsig", iter = 200,
+                            alternative = "greater", alpha = 0.05)
+  set.seed(2032)
+  less <- sim.multDVhack(nobs.group = 30, nvar = 4, r = 0.3,
+                         effect = 0.8, heterogeneity = 0,
+                         strategy = "firstsig", iter = 200,
+                         alternative = "less", alpha = 0.05)
+
+  expect_gt(sum(greater$ps.orig < 0.05), sum(less$ps.orig < 0.05))
+  expect_gt(mean(greater$effect.initial), 0)
+  expect_gt(mean(greater$ds.orig), 0)
+
+})
+
+test_that("One-sided regression IV tests follow positive associations", {
+
+  set.seed(2033)
+  greater <- sim.multIVhack(nobs.group = 40, nvar = 4, r = 0.2,
+                            regression = TRUE, effect = 0.5,
+                            heterogeneity = 0, strategy = "firstsig",
+                            alternative = "greater", alpha = 0.05, iter = 200)
+  set.seed(2033)
+  less <- sim.multIVhack(nobs.group = 40, nvar = 4, r = 0.2,
+                         regression = TRUE, effect = 0.5,
+                         heterogeneity = 0, strategy = "firstsig",
+                         alternative = "less", alpha = 0.05, iter = 200)
+
+  expect_gt(sum(greater$ps.orig < 0.05), sum(less$ps.orig < 0.05))
+  expect_gt(mean(greater$effect.initial), 0)
+
+})
+
 test_that("Scale Redefinition works", {
 
   set.seed(1234)
   scaledef1 <- sim.compscoreHack(nobs = 30, ncompv = 10, rcomp = 0.5,
+                                 effect = 0, heterogeneity = 0,
                                  ndelete = 5, strategy = "firstsig",
                                  alpha = 0.05, iter = 100)
   set.seed(1234)
   scaledef2 <- sim.compscoreHack(nobs = 30, ncompv = 10, rcomp = 0.5,
+                                 effect = 0, heterogeneity = 0,
                                  ndelete = 5, strategy = "smallest.sig",
                                  alpha = 0.05, iter = 100)
 
@@ -188,10 +531,12 @@ test_that("Scale Redefinition works", {
 test_that("Exploiting arbitraray cutoffs works", {
 
   set.seed(1234)
-  arbitCutoff1 <- sim.cutoffHack(nobs = 30, strategy = "firstsig", alpha = 0.05,
+  arbitCutoff1 <- sim.cutoffHack(nobs = 30, strategy = "firstsig",
+                                 effect = 0, heterogeneity = 0, alpha = 0.05,
                                  iter = 100)
   set.seed(1234)
   arbitCutoff2 <- sim.cutoffHack(nobs = 30, strategy = "smallest.sig",
+                                 effect = 0, heterogeneity = 0,
                                  alpha = 0.05, iter = 100)
 
   expect_equal(nrow(arbitCutoff1), 100)
@@ -207,10 +552,12 @@ test_that("Exploiting statistical analysis options works", {
 
   set.seed(1234)
   arbitStats1 <- sim.statAnalysisHack(nobs.group = 30, strategy = "firstsig",
+                                      effect = 0, heterogeneity = 0,
                                       alternative = "two.sided", alpha = 0.05,
                                       iter = 100)
   set.seed(1234)
   arbitStats2 <- sim.statAnalysisHack(nobs = 30, strategy = "smallest.sig",
+                                      effect = 0, heterogeneity = 0,
                                       alternative = "two.sided", alpha = 0.05,
                                       iter = 100)
 
@@ -227,9 +574,11 @@ test_that("Exploiting variable transformations works", {
 
   set.seed(1234)
   varT1 <- sim.varTransHack(nobs = 30, transvar = "xy",
+                            effect = 0, heterogeneity = 0,
                             strategy = "firstsig", alpha = 0.05, iter = 100)
   set.seed(1234)
   varT2 <- sim.varTransHack(nobs = 30, transvar = "xy",
+                            effect = 0, heterogeneity = 0,
                             strategy = "smallest.sig", alpha = 0.05, iter = 100)
 
   expect_equal(nrow(varT1), 100)
@@ -245,9 +594,11 @@ test_that("Exploiting missing value imputation works", {
 
   set.seed(1234)
   misval1 <- sim.impHack(nobs = 30, missing = 0.1, which = "random",
+                         effect = 0, heterogeneity = 0,
                          strategy = "firstsig", alpha = 0.05, iter = 100)
   set.seed(1234)
   misval2 <- sim.impHack(nobs = 30, missing = 0.1, which = "random",
+                         effect = 0, heterogeneity = 0,
                          strategy = "smallest.sig", alpha = 0.05, iter = 100)
 
   expect_equal(nrow(misval1), 100)
diff --git a/phackR/vignettes/phackR_vignette.Rmd b/phackR/vignettes/phackR_vignette.Rmd
index aabd4db..b9287bb 100644
--- a/phackR/vignettes/phackR_vignette.Rmd
+++ b/phackR/vignettes/phackR_vignette.Rmd
@@ -24,20 +24,20 @@ In the literature, different papers describe different p-hacking strategies. We
 ## General Remarks
 
 ### Structure of the code
-Every p-hacking strategy is stored in a separate file. In each file, there are three main code blocks. The first block contains a function to simulate data that can be used with the p-hacking strategy (or a link to a more generic function that can be used, e.g., from the "helpers.R" file). The second block contains the p-hacking function. Here, we define the strategy to analyze the data with (different levels of) p-hacking. Typically, these functions take the data that were created in the first block as an input. The output is a list with two elements: `p.final`, i.e., the final p-hacked p-value, and `ps`, i.e., all p-values "visited" in the process. Typically the first ps value is the original, non-p-hacked p-value. The third block combines the functions from the first two code blocks in one simulation function. Essentially, the functions in this block consist of a big loop where data are created and analyzed repeatedly. The output of this function is always a matrix with two columns: The first column contains the p-hacked values, i.e., the `p.final` values from the p-hacking function. The second column contains the "raw" non p-hacked values. If the p-hacking was "successful" in the respective iteration, the first column will show a smaller p-value than the second column. If the p-hacking was "unsuccessful", the first and second column won't differ.
+Every p-hacking strategy is stored in a separate file. In each file, there are three main code blocks. The first block contains a function to simulate data that can be used with the p-hacking strategy (or a link to a more generic function that can be used, e.g., from the "helpers.R" file). The second block contains the p-hacking function. Here, we define the strategy to analyze the data with (different levels of) p-hacking. Typically, these functions take the data that were created in the first block as an input. The output is a list with two elements: `p.final`, i.e., the final p-hacked p-value, and `ps`, i.e., all p-values "visited" in the process. Typically the first ps value is the original, non-p-hacked p-value. The third block combines the functions from the first two code blocks in one simulation function. Essentially, the functions in this block consist of a big loop where data are created and analyzed repeatedly. The exported simulation functions now return the legacy columns together with a normalized reporting block that records `effect.initial`, `effect.final`, `se.initial`, `se.final`, `n.initial`, `n.final`, `stat.initial`, `stat.final`, `p.initial`, `p.final`, `method.initial`, and `method.final`.
 
 Data simulation functions, p-hacking functions, and any helper functions are specified as internal functions, i.e., start with a "." and are not exported (alas, we don't want people to use this as a toolkit for p-hacking, so let's use this as a safety belt...). All simulation functions are exported and start with "sim.". All functions are documented using `roxygen` documentation. Unit tests of the simulation functions can be found in the `tests` folder.
 
 ### Hypothesis tests
 We use two kinds of hypothesis tests: the independent samples t-test and univariate linear regression. The standard choice is the independent samples t-test. However, some p-hacking methods are not easily applicable to this hypothesis test or are more likely to be used in a continuous variable case. In these cases, we use a univariate linear regression. Some p-hacking strategies specifically rely on different hypothesis tests. In these cases, this is mentioned in the description of the p-hacking strategies below.
 
-The result object of the p-hacking simulation functions contains the original and p-hacked p-value for all analyses. Additionally, whenever possible the determination coefficient $R^2$ of the original and p-hacked result is shown as well. When only t-tests are used in the simulation, the output also contains the original and p-hacked Cohen's d. The "hacked" effect sizes are always based on the p-hacked p-values, i.e., the analysis is still selected based on the smallest p-value, not on the smallest effect size.
+The result object of the p-hacking simulation functions contains the original and p-hacked p-value for all analyses. Additionally, whenever possible the determination coefficient $R^2$ of the original and p-hacked result is shown as well. When only t-tests are used in the simulation, the legacy output also contains the original and p-hacked Cohen's d. All exported simulators additionally contain the normalized `initial` / `final` reporting block. The "hacked" effect sizes are always based on the p-hacked p-values, i.e., the analysis is still selected based on the smallest p-value, not on the smallest effect size.
 
 ### "Normal" versus "Ambitious" p-hacking
 Most p-hacking simulation functions have an argument "strategy". This argument determines how the final p-value is chosen. There are three options: `strategy = "firstsig"` simulates the situation where a researcher tries out some p-hacking methods and stops as soon as the result is significant, that is, at the first significant p-value. In a comment on Simonsohn et al. (2014), Ulrich & Miller (2015) argued that researchers might instead engage in "ambitious" p-hacking, where the smallest significant p-value is selected. This strategy is implemented in the option `strategy = "smallest.sig"`. Simonsohn (private comm.) argues that there might exist a third p-hacking strategy where the researchers try a number of different analysis options and select the smallest p-value no matter if it is significant or not. We implemented this in the option `strategy = "smallest"`. This strategy implies that researchers use p-values as a criterion for validity of analyses "whichever analysis works better is probably the right analysis to keep" (Simonsohn, private comm.).
 
 ### True effect sizes
-The true effect size in the simulations is always zero. This means that the data are simulated such that there is no difference between the means of the two groups in the t-test (mean = 0, sd = 1) or that the correlation between the two variables of interest is zero.
+The study-level effect is drawn as `theta_i ~ Normal(effect, heterogeneity)`. The default remains the null regime (`effect = 0`, `heterogeneity = 0`), which reproduces the original behavior. The scale of `effect` depends on the simulator family: the group-comparison functions use the standardized mean-difference scale, while the regression / correlation functions use the Fisher-z scale.
 
 ### Default settings
 All p-hacking simulation functions come with the same default settings for common arguments. We will only explain these once and mention below if anything differs from these defaults.
@@ -45,12 +45,13 @@ All p-hacking simulation functions come with the same default settings for commo
 * `strategy`: As described above, this argument defines the strategy of selecting p-values (`strategy = "firstsig"`), (`strategy = "smallest.sig"`), or (`strategy = "smallest"`). The default setting is `strategy = "firstsig"`.
 * `alpha`: The significance level of the hypothesis test. If p < alpha, the result is deemed significant. The default setting is `alpha = 0.05`.
 * `iter`: Number of iterations in the simulation process. The default is `iter = 1000`, i.e., the simulation will result in 1000 p-hacked p-values.
-* `alternative`: Whenever a t-test is used as an analysis function, the argument `alternative` specifies the direction of the t-test. As in the `stats::t.test` function, the default is `two.sided`.
+* `alternative`: Whenever a t-test is used as an analysis function, the argument `alternative` specifies the direction of the t-test. In the group-comparison family, `greater` tests whether the treatment or second group exceeds the control or first group, that is, whether the effect is positive. The default is `two.sided`.
+* `effect` and `heterogeneity`: These control the mean and between-study standard deviation of the study-level true effect. The scale depends on the simulator family: standardized mean differences for the group-comparison family, Fisher-z values for the regression / correlation family. Both default to `0`.
 
 ## Description of P-Hacking Strategies and Their Implementation
 
 ### Selective reporting of the dependent variable
-We simulate this p-hacking strategy using the t-test. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and several continuous variables (dependent variables). The correlation between the dependent variables can be set using the argument `r`. The size of the groups can be defined by `nobs.group` (either an integer for equally sized groups or a vector with two elements for groups of different sizes). The **p-hacking strategy** is implemented as follows: Compute t-test with every single of the dependent variables. Then select the final p-value using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d.
+We simulate this p-hacking strategy using the t-test. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and several continuous variables (dependent variables). The correlation between the dependent variables can be set using the argument `r`. The size of the groups can be defined by `nobs.group` (either an integer for equally sized groups or a vector with two elements for groups of different sizes). The simulator also accepts `effect` and `heterogeneity` for the study-level true effect on the standardized mean-difference scale. The **p-hacking strategy** is implemented as follows: Compute t-test with every single of the dependent variables. Then select the final p-value using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, the original and p-hacked Cohen's d, and the normalized reporting block.
 
 The simulation code can be found in the file `selectiveReportingDV.R`.
 
@@ -58,12 +59,13 @@ The simulation code can be found in the file `selectiveReportingDV.R`.
 
 ```{r selectiveReportingDV}
 set.seed(1234)
-sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest", 
+sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.3, effect = 0, heterogeneity = 0,
+               strategy = "smallest",
                iter = 10, alternative = "two.sided", alpha = 0.05)
 ```
 
 ### Selective reporting of the independent variable
-This p-hacking strategy can be simulated using a t-test or a regression analysis with the provided function. For the t-test, the **simulated dataset** contains one control group variable and multiple treatment group variables (independent variables), thus it is in a wide format. The correlation between the treatment group variables can be set using the argument `r`. The size of the groups can be defined by `nobs.group`. Note that unequal sizes of groups are not possible here because the data set is in wide format. For the regression analysis, the simulated dataset contains one criterion (dependent) variable, and multiple predictor (independent) variables. The **p-hacking strategy** is implemented as follows for the t-test: Conduct an independent samples t-test with every single treatment variable and the control group variable. Then select the final p-value using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). For the regression analysis, the test is conducted with each predictor, then the final p-value is selected using one of the p-value selection strategies. The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d.
+This p-hacking strategy can be simulated using a t-test or a regression analysis with the provided function. For the t-test, the **simulated dataset** contains one control group variable and multiple treatment group variables (independent variables), thus it is in a wide format. The correlation between the treatment group variables can be set using the argument `r`. The size of the groups can be defined by `nobs.group`, including unequal group sizes. For the regression analysis, the simulated dataset contains one criterion (dependent) variable, and multiple predictor (independent) variables. Both variants accept `effect` and `heterogeneity`. In the t-test variant, these are on the standardized mean-difference scale; in the regression variant, they are on the Fisher-z scale. The **p-hacking strategy** is implemented as follows for the t-test: Conduct an independent samples t-test with every single treatment variable and the control group variable. Then select the final p-value using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). For the regression analysis, the test is conducted with each predictor, then the final p-value is selected using one of the p-value selection strategies. The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d, plus the normalized reporting block for the t-test variant and the regression variant.
 
 The simulation code can be found in the file `selectiveReportingIV.R`.
 
@@ -71,13 +73,14 @@ The simulation code can be found in the file `selectiveReportingIV.R`.
 
 ```{r selectiveReportingIV}
 set.seed(1234)
-sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest",
-               regression = FALSE, iter = 10, alternative = "two.sided", 
+sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3, effect = 0.4, heterogeneity = 0.1,
+               strategy = "smallest",
+               regression = TRUE, iter = 10, alternative = "two.sided",
                alpha = 0.05)
 ```
 
 ### Incorrect rounding
-We simulate this p-hacking strategy using the t-test, but literally any hypothesis test should produce the same results. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and one continous variable (dependent variable). Note that the simulation function does not include an argument for the group size as the group size is irrelevant in this p-hacking context. The **p-hacking strategy** is implemented as follows: Compute the p-value for the t-test, if it is between the alpha level and a rounding level specified in the argument `roundinglevel` (e.g., 0.06 > p > 0.05), round it down to p = 0.05, else keep the original p-value. Note that selecting a p-value selection strategy (first  significant p-value, smallest significant p-value, smallest p-value) is not possible for this p-hacking strategy. The output contains the original and p-hacked p-values, and the original and p-hacked $R^2$. Note that $R^2$ does not change because it is unlikely that researchers would adjust effect sizes retrospectively based on a rounded p-value.
+We simulate this p-hacking strategy using the t-test, but literally any hypothesis test should produce the same results. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and one continous variable (dependent variable). Note that the simulation function does not include an argument for the group size as the group size is irrelevant in this p-hacking context. The simulator also accepts `effect` and `heterogeneity` on the standardized mean-difference scale. The **p-hacking strategy** is implemented as follows: Compute the p-value for the t-test, if it is between the alpha level and a rounding level specified in the argument `roundinglevel` (e.g., 0.06 > p > 0.05), round it down to p = 0.05, else keep the original p-value. Note that selecting a p-value selection strategy (first  significant p-value, smallest significant p-value, smallest p-value) is not possible for this p-hacking strategy. The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block. Note that $R^2$ does not change because it is unlikely that researchers would adjust effect sizes retrospectively based on a rounded p-value.
 
 The simulation code can be found in the file `incorrectRounding.R`.
 
@@ -85,24 +88,26 @@ The simulation code can be found in the file `incorrectRounding.R`.
 
 ```{r incorrectRounding}
 set.seed(1234)
-sim.roundhack(roundinglevel = 0.06, iter = 10, alternative = "two.sided", 
+sim.roundhack(roundinglevel = 0.06, effect = 0, heterogeneity = 0, iter = 10,
+              alternative = "two.sided",
               alpha = 0.05)
 ```
 
 ### Optional stopping / Data peeking
-We simulate this p-hacking strategy using the t-test. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and one continous variable (dependent variable), both should have at least the sample size defined in `n.max`. Note that this is the reason why the simulation function does not have an argument to specify the sample size per group (it is automatically set to n.max). The **p-hacking strategy** is implemented as follows: The dataset is evaluated row-by-row, starting with a minimum sample size of `n.min`. At each step, a number of observations is added to the sample, defined by the argument `step` and the t-test is computed. This continues until the maximum sample size specified in `n.max` is reached. The p-hacked p-value is defined as the first p-value that is smaller than the defined alpha level. The non-p-hacked p-value is defined as the p-value at n.max (this implies that if there was no p-hacking, n.max would be the specified fixed sample size). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d.
+We simulate this p-hacking strategy using the t-test. The **simulated dataset** contains one discrete group variable with two levels (independent variable) and one continous variable (dependent variable), both should have at least the sample size defined in `n.max`. Note that this is the reason why the simulation function does not have an argument to specify the sample size per group (it is automatically set to n.max). The simulator also accepts `effect` and `heterogeneity` on the standardized mean-difference scale. The **p-hacking strategy** is implemented as follows: The dataset is evaluated row-by-row, starting with a minimum sample size of `n.min`. At each step, a number of observations is added to the sample, defined by the argument `step` and the t-test is computed. This continues until the maximum sample size specified in `n.max` is reached. The p-hacked p-value is defined as the first p-value that is smaller than the defined alpha level. The non-p-hacked p-value is defined as the p-value at n.max (this implies that if there was no p-hacking, n.max would be the specified fixed sample size). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, the original and p-hacked Cohen's d, and the normalized reporting block.
 
 The simulation code can be found in the file `optionalStopping.R`.
 
 **Example**
 ```{r optionalStopping}
 set.seed(1234)
-sim.optstop(n.min = 10, n.max = 20, step = 2, alternative = "two.sided", 
+sim.optstop(n.min = 10, n.max = 20, step = 2, effect = 0, heterogeneity = 0,
+            alternative = "two.sided",
             iter = 10, alpha = 0.05)
 ```
 
 ### Outlier exclusion
-We simulate this p-hacking strategy using univariate linear regression (most outlier detection methods are based on some kind of distance measures on continuous variables). The **simulated dataset** contains two continuous variables, both with the sample size specified in the argument `nobs`. The **p-hacking strategy** is implemented as follows: The original p-value is computed as a simple linear regression(y ~ x) with all values. For the p-hacked p-values, outlier values are searched in x and y using an outlier definition criterion. Then, a pairwise deletion of outlier values follows for the following three options: (1) remove xy pairs where x is an outlier, (2) remove xy pairs where y is an outlier, (3) remove xy pairs where x and y are outliers. Therefore, for every outlier deletion strategy, we end up with three datasets where different values are removed (some of these may be identical and will be cut away by the `.extractoutlier` function in practice). The p-hacking method contains 12 separate outlier definition criteria, some of which have several sub-criteria (e.g., delete values that are > *x* standard deviations above or below the mean, with *x* being 2, 2.5, 3, ...). In the case of sub-criteria, the outlier deletion process is applied to each sub-criterion, so that we end up with *3 x number of sub-criteria* data sets (again, some of which might be identical and cut away by the `.extractoutlier` function). In the simulation function, either all separate outlier definition criteria can be applied (i.e., `which = c(1:12)`), a specific subset can be chosen (e.g., `which = c(1,3,5)`), or a random subset of 5 different outlier strategies can be chosen (`which = "random"`). We believe that the latter shows a realistic scenario as not all researchers know all outlier definition criteria and the ones they know are probably a random subset of the existing outlier definition criteria (which does not mean that our simulation method covers all existing outlier definition criteria). For all outlier definition criteria, the outlier-deleted datasets are created and evaluated using the t-test function. In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values as well as the original and p-hacked $R^2$.
+We simulate this p-hacking strategy using univariate linear regression (most outlier detection methods are based on some kind of distance measures on continuous variables). The **simulated dataset** contains two continuous variables, both with the sample size specified in the argument `nobs`. The function also accepts `effect` and `heterogeneity` on the Fisher-z scale. The **p-hacking strategy** is implemented as follows: The original p-value is computed as a simple linear regression(y ~ x) with all values. For the p-hacked p-values, outlier values are searched in x and y using an outlier definition criterion. Then, a pairwise deletion of outlier values follows for the following three options: (1) remove xy pairs where x is an outlier, (2) remove xy pairs where y is an outlier, (3) remove xy pairs where x and y are outliers. Therefore, for every outlier deletion strategy, we end up with three datasets where different values are removed (some of these may be identical and will be cut away by the `.extractoutlier` function in practice). The p-hacking method contains 12 separate outlier definition criteria, some of which have several sub-criteria (e.g., delete values that are > *x* standard deviations above or below the mean, with *x* being 2, 2.5, 3, ...). In the case of sub-criteria, the outlier deletion process is applied to each sub-criterion, so that we end up with *3 x number of sub-criteria* data sets (again, some of which might be identical and cut away by the `.extractoutlier` function). In the simulation function, either all separate outlier definition criteria can be applied (i.e., `which = c(1:12)`), a specific subset can be chosen (e.g., `which = c(1,3,5)`), or a random subset of 5 different outlier strategies can be chosen (`which = "random"`). We believe that the latter shows a realistic scenario as not all researchers know all outlier definition criteria and the ones they know are probably a random subset of the existing outlier definition criteria (which does not mean that our simulation method covers all existing outlier definition criteria). For all outlier definition criteria, the outlier-deleted datasets are created and evaluated using the regression function. In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block.
 
 Below, we describe all outlier exclusion criteria:
 
@@ -124,26 +129,28 @@ The simulation code can be found in the file `outlierExclusion.R`.
 **Example**
 ```{r outlierExclusion}
 set.seed(1234)
-sim.outHack(nobs = 30, which = "random", strategy = "smallest", alpha = 0.05, 
+sim.outHack(nobs = 30, which = "random", effect = 0, heterogeneity = 0,
+            strategy = "smallest", alpha = 0.05, 
             iter = 10)
 ```
 
 
 ### Exploiting covariates
-We simulate this p-hacking strategy using a t-test that is expanded to an ANCOVA when covariates are taken into account. The **simulated dataset** contains a discrete group variable with two levels (independent variable), a continuous dependent variable, and a set of continuous covariates. The number of covariates can be specified using the argument `ncov`. The correlation of the dependent variable with the covariates as well as the correlation among the covariates can be specified using the arguments `rcovdv` and `rcov`, respectively. The **p-hacking strategy** is implemented as follows: First, the original p-value without covariates is calculated. Then, all covariates are added to the model separately (i.e., dv ~ group + cov1, df ~ group + cov2, ...). Then, covariates are ordered in decreasing order with respect to their correlation with the dependent variable and then added to the model sequentially (i.e., dv ~ group + covhighest, dv ~ covhighest + covsecondhighest, ...). All models are evaluated and the final p-value is selected using one of the strategies (first significant p-value, smallest significant p-value, smallest p-value). The models can either be defined as models without interaction terms (`interactions = FALSE` as is the default) or with interaction terms. The output contains the original and p-hacked p-values as well as the $ partial\,\eta^2$. In this case, we do not display the overall $R^2$ of the analyses because in the ANCOVAs the focus does not lie on the overall explained variance.
+We simulate this p-hacking strategy using a t-test that is expanded to an ANCOVA when covariates are taken into account. The **simulated dataset** contains a discrete group variable with two levels (independent variable), a continuous dependent variable, and a set of continuous covariates. The number of covariates can be specified using the argument `ncov`. The correlation of the dependent variable with the covariates as well as the correlation among the covariates can be specified using the arguments `rcovdv` and `rcov`, respectively. The simulator also accepts `effect` and `heterogeneity`, so the study-level group effect can vary instead of staying fixed at the null. The **p-hacking strategy** is implemented as follows: First, the original p-value without covariates is calculated. Then, all covariates are added to the model separately (i.e., dv ~ group + cov1, df ~ group + cov2, ...). Then, covariates are ordered in decreasing order with respect to their correlation with the dependent variable and then added to the model sequentially (i.e., dv ~ group + covhighest, dv ~ covhighest + covsecondhighest, ...). All models are evaluated and the final p-value is selected using one of the strategies (first significant p-value, smallest significant p-value, smallest p-value). The models can either be defined as models without interaction terms (`interactions = FALSE` as is the default) or with interaction terms. The output contains the original and p-hacked p-values, the original and p-hacked $ partial\,\eta^2$, and the normalized reporting block. In this case, we do not display the overall $R^2$ of the analyses because in the ANCOVAs the focus does not lie on the overall explained variance.
 
 The simulation code can be found in the file `exploitCovariates.R`.
 
 **Example**
 ```{r exploitCovariates}
 set.seed(1234)
-sim.covhack(nobs.group = 30, ncov = 4, rcov = 0.3, rcovdv = 0.5, 
-            interactions = FALSE, strategy = "smallest", 
+sim.covhack(nobs.group = 30, ncov = 4, rcov = 0.3, rcovdv = 0.5,
+            interactions = FALSE, strategy = "smallest",
+            effect = 0.4, heterogeneity = 0.1,
             alpha = 0.05, iter = 10)
 ```
 
 ### Subgroup analyses / Tinkering with inclusion criteria
-We simulate this p-hacking strategy using a t-test. The **simulated dataset** consists of one discrete group variable (independent variable), one continuous dependent variable, and several binary subgroup variables. The size of the original groups can be defined by `nobs.group` (either an integer for equally sized groups or a vector with two elements for groups of different sizes). Group sizes in the subgroup variables can differ (group membership is sampled randomly from `c(0,1)`). The number of subgroup variables can be defined using the argument `nsubvars`. The **p-hacking strategy** is implemented as follows: First, the original p-value is computed using the full dataset (independent and dependent variable). Then, for each subgroup variable, the dataset is split into two parts and the t-test is conducted separately in each part (e.g., as if one would conduct the same t-test for men and women). Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d.
+We simulate this p-hacking strategy using a t-test. The **simulated dataset** consists of one discrete group variable (independent variable), one continuous dependent variable, and several binary subgroup variables. The size of the original groups can be defined by `nobs.group` (either an integer for equally sized groups or a vector with two elements for groups of different sizes). Group sizes in the subgroup variables can differ (group membership is sampled randomly from `c(0,1)`). The simulator also accepts `effect` and `heterogeneity` on the standardized mean-difference scale. The number of subgroup variables can be defined using the argument `nsubvars`. The **p-hacking strategy** is implemented as follows: First, the original p-value is computed using the full dataset (independent and dependent variable). Then, for each subgroup variable, the dataset is split into two parts and the t-test is conducted separately in each part (e.g., as if one would conduct the same t-test for men and women). Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the original and p-hacked Cohen's d, plus the normalized reporting block.
 
 Note: This p-hacking strategy can both be seen as a subgroup analysis (e.g., "evaluate data separately for men and women") or as tinkering with inclusion criteria (e.g., "include only women (if the analysis is significant for them)"). Technically, for tinkering with inclusion criteria, maybe only one of the levels of the subgroups should be evaluated but this would imply that the selection of the inclusion criterion was theory-driven. The current approach does not assume this (inclusion criterion is "HARKed").
 
@@ -152,59 +159,64 @@ The simulation code can be found in the file `subgroupAnalysis.R`.
 **Example**
 ```{r subgroupAnalysis}
 set.seed(1234)
-sim.subgroupHack(nobs.group = 30, nsubvars = 3, alternative = "two.sided", 
+sim.subgroupHack(nobs.group = 30, nsubvars = 3, effect = 0, heterogeneity = 0,
+                 alternative = "two.sided",
                  strategy = "smallest", alpha = 0.05, iter = 10)
 ```
 
 ### Composite scores / Scale redefinition
-We simulate this p-hacking strategy using univariate linear regression (so that we don't mix it up with the multiple dependent variable p-hacking strategy). The **simulated dataset** consists of one dependent variable and several correlated score variables that can potentially form the items of a score (these are not correlated with the dependent variable). The correlation between these score variables can be specified using the `rcomp` argument, the number of the score variables can be specified using the `ncompv` argument. The **p-hacking strategy** is implemented as follows: First, the original p-value is calculated using the mean score of all score variables as a predictor variable. Then, one by one, items are removed from the scale. The next item to remove is always chosen based on "Cronbach's alpha when item deleted" (the common choice in SPSS), i.e., the item is chosen that makes the scale most consistent. In each iteration, the  p-values for linear regressions using the following predictors are calculated: (1) new composite score, i.e., reduced item scale; (2) deleted item from the score. The maximum number of items to be removed from the scale can be set using the argument `ndelete`. In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values as well as the original and p-hacked $R^2$.
+We simulate this p-hacking strategy using univariate linear regression (so that we don't mix it up with the multiple dependent variable p-hacking strategy). The **simulated dataset** consists of one dependent variable and several correlated score variables that can potentially form the items of a score. The correlation between these score variables can be specified using the `rcomp` argument, the number of the score variables can be specified using the `ncompv` argument. The function also accepts `effect` and `heterogeneity` on the Fisher-z scale, where the original analysis targets the full composite score. The **p-hacking strategy** is implemented as follows: First, the original p-value is calculated using the mean score of all score variables as a predictor variable. Then, one by one, items are removed from the scale. The next item to remove is always chosen based on "Cronbach's alpha when item deleted" (the common choice in SPSS), i.e., the item is chosen that makes the scale most consistent. In each iteration, the  p-values for linear regressions using the following predictors are calculated: (1) new composite score, i.e., reduced item scale; (2) deleted item from the score. The maximum number of items to be removed from the scale can be set using the argument `ndelete`. In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block.
 
 The simulation code can be found in the file `compositeScores.R`.
 
 **Example**
 ```{r compositeScores}
 set.seed(1234)
-sim.compscoreHack(nobs = 30, ncompv = 5, rcomp = 0.7, ndelete = 3, 
+sim.compscoreHack(nobs = 30, ncompv = 5, rcomp = 0.7, effect = 0,
+                  heterogeneity = 0, ndelete = 3, 
                   strategy = "smallest", alpha = 0.05, iter = 10)
 ```
 
 ### Exploiting variable transformations
-We simulate this p-hacking strategy using univariate linear regression because most variable transformation strategies are not applicable to discrete variables. The **simulated dataset** contains two continuous variables that are not correlated with each other. The **p-hacking strategy** is implemented as follows: The original p-value is calculated as a simple univariate linear regression y ~ x. Then, the variables in the model are transformed. Using the argument `transvar` one can specify whether only x (`transvar = "x"`), only y (`transvar = "y"`) or both (`transvar = "xy"`) should be transformed. The respective variables are transformed using a log transformation, a square root transformation and the inverse. The function offers the option that variables are only transformed conditional on a significant normality test of residuals (Shapiro-Wilk). This can be adjusted using the argument `testnorm`. Once the variables have been transformed, linear regressions are computed for all transformations of the variables. The final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values as well as the original and p-hacked $R^2$.
+We simulate this p-hacking strategy using univariate linear regression because most variable transformation strategies are not applicable to discrete variables. The **simulated dataset** contains two continuous variables. The function also accepts `effect` and `heterogeneity` on the Fisher-z scale. The **p-hacking strategy** is implemented as follows: The original p-value is calculated as a simple univariate linear regression y ~ x. Then, the variables in the model are transformed. Using the argument `transvar` one can specify whether only x (`transvar = "x"`), only y (`transvar = "y"`) or both (`transvar = "xy"`) should be transformed. The respective variables are transformed using a log transformation, a square root transformation and the inverse. The function offers the option that variables are only transformed conditional on a significant normality test of residuals (Shapiro-Wilk). This can be adjusted using the argument `testnorm`. Once the variables have been transformed, linear regressions are computed for all transformations of the variables. The final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block.
 
 The simulation code can be found in the file `variableTransformation.R`.
 
 **Example**
 ```{r variableTransformation}
 set.seed(1234)
-sim.varTransHack(nobs = 30, transvar = "xy", strategy = "smallest", 
+sim.varTransHack(nobs = 30, transvar = "xy", effect = 0, heterogeneity = 0,
+                 strategy = "smallest", 
                  alpha = 0.05, iter = 10)
 ```
 
 ### Exploiting arbitrary cutoff values
-We simulate this p-hacking strategy using a t-test / ANOVA (but the underlying data is actually continuous). The **simulated dataset** contains two continuous variables that are uncorrelated (dependent and independent variable). In the **p-hacking strategy**, three mechanisms are applied to make the independent variable discrete. The first mechanism is a median split (leading to two groups, ergo a t-test). The second mechanism is a three-wise split, after which only the two extreme categories are compared in a t-test ("cut the middle" strategy). The third mechanism is a three-wise split again, but this time all three categories are evaluated using an ANOVA (only the omnibus test is evaluated). Additionally to these, the original univariate linear regression is evaluated to obtain the original p-value. Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values as well as the original and p-hacked $R^2$.
+We simulate this p-hacking strategy using a t-test / ANOVA (but the underlying data is actually continuous). The **simulated dataset** contains two continuous variables. The function also accepts `effect` and `heterogeneity` on the Fisher-z scale. In the **p-hacking strategy**, three mechanisms are applied to make the independent variable discrete. The first mechanism is a median split (leading to two groups, ergo a t-test). The second mechanism is a three-wise split, after which only the two extreme categories are compared in a t-test ("cut the middle" strategy). The third mechanism is a three-wise split again, but this time all three categories are evaluated using an ANOVA (only the omnibus test is evaluated). Additionally to these, the original univariate linear regression is evaluated to obtain the original p-value. Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block.
 
 The simulation code can be found in the file `exploitCutoffs.R`.
 
 **Example**
 ```{r exploitCutoffs}
 set.seed(1234)
-sim.cutoffHack(nobs = 30, strategy = "smallest", alpha = 0.05, iter = 10)
+sim.cutoffHack(nobs = 30, strategy = "smallest", effect = 0,
+               heterogeneity = 0, alpha = 0.05, iter = 10)
 ```
 
 ### Exploiting statistical analysis options
-We simulate this p-hacking strategy using a t-test (and related tests for comparing the central tendency in two independent groups). The **simulated dataset** contains a discrete group variable (independent variable) and a continuous dependent variable. The **p-hacking strategy** is implemented as follows: The central tendency in the groups is compared using a t-test (original analysis), a Welch test, a Wilcoxon test, and a Yuen test (trimming options: 0.1, 0.15, 0.2, and 0.25). Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains only the original and p-hacked p-values because there is no common effect size for the analayses used.
+We simulate this p-hacking strategy using a t-test (and related tests for comparing the central tendency in two independent groups). The **simulated dataset** contains a discrete group variable (independent variable) and a continuous dependent variable. The simulator also accepts `effect` and `heterogeneity` on the standardized mean-difference scale. The **p-hacking strategy** is implemented as follows: The central tendency in the groups is compared using a t-test (original analysis), a Welch test, a Wilcoxon test, and a Yuen test (trimming options: 0.1, 0.15, 0.2, and 0.25). Then the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the legacy p-value and effect-size columns plus the normalized reporting block, even though the reporting method differs across analyses.
 
 The simulation code can be found in the file `statAnalysis.R`.
 
 **Example**
 ```{r statAnalysis}
 set.seed(1234)
-sim.statAnalysisHack(nobs.group = 30, strategy = "smallest", 
+sim.statAnalysisHack(nobs.group = 30, effect = 0, heterogeneity = 0,
+                     strategy = "smallest",
                      alternative = "two.sided", alpha = 0.05, iter = 10)
 ```
 
 ### Favorable imputation of missing values
-We simulate this p-hacking strategy using univariate linear regression because many imputation methods have been developed for continuous variables. The **simulated dataset** contains two non-correlated continuous variables. Both of these contain missing values. The percentage of missing values is defined by the argument `missing`. The **p-hacking strategy** is implemented similar to the outlier exclusion strategy. There are 10 different imputation methods that can be selected using the argument `which`. It is possible to select all of them (`which = c(1:10)`), a specific subset of them (e.g., `which = c(1,4,6)`), or a random subset of 5 imputation methods (`which = "random"`). In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values as well as the original and p-hacked $R^2$.
+We simulate this p-hacking strategy using univariate linear regression because many imputation methods have been developed for continuous variables. The **simulated dataset** contains two continuous variables. Both of these contain missing values. The percentage of missing values is defined by the argument `missing`. The function also accepts `effect` and `heterogeneity` on the Fisher-z scale. The **p-hacking strategy** is implemented similar to the outlier exclusion strategy. There are 10 different imputation methods that can be selected using the argument `which`. It is possible to select all of them (`which = c(1:10)`), a specific subset of them (e.g., `which = c(1,4,6)`), or a random subset of 5 imputation methods (`which = "random"`). In the end, the final p-value is selected using one of the p-value selection strategies (first significant p-value, smallest significant p-value, smallest p-value). The output contains the original and p-hacked p-values, the original and p-hacked $R^2$, and the normalized reporting block.
 
 Below, we describe all imputation methods:
 
@@ -224,6 +236,7 @@ The simulation code can be found in the file `favorableImputation.R`.
 **Example**
 ```{r favorableImputation}
 set.seed(1234)
-sim.impHack(nobs = 30, missing = 0.2, which = c(1:10), strategy = "smallest", 
+sim.impHack(nobs = 30, missing = 0.2, which = c(1:10), effect = 0,
+            heterogeneity = 0, strategy = "smallest", 
             alpha = 0.05, iter = 10)
 ```