statds · jun-yan · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024 · Jan 8, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,32 @@
+\.Rcheck$
+\.Rout$
+\.Rproj$
+\.tar\.gz$
+^GPATH$
+^GRTAGS$
+^GTAGS$
+^LICENSE$
+^Makefile$
+^README\.Rmd$
+^README\.html$
+^README_cache$
+^TAGS$
+^TODO\.org$
+^\#
+^\.Rhistory$
+^\.Rproj\.user$
+^\.\#
+^\.clang_complete$
+^\.clangd$
+^\.git$
+^\.github$
+^\.gitlab-ci.yml$
+^\.travis\.yml$
+^_pkgdown\.yml$
+^appveyor\.yml$
+^docs$
+^misc$
+^revdep$
+^test$
+^working$
+~$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,28 @@
+Package: ibist
+Title: Data and Functions for Introduction to Biostatistics with R
+Version: 0.1-0
+Authors@R: c(
+    person(given = "Elizabeth", family = "Schifano",
+           role = c("aut"),
+           comment = c(ORCID = "0000-0002-9793-332X")),
+    person(given = "Jun", family = "Yan",
+           email = "jun.yan@uconn.edu",
+           role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0003-4401-7296"))
+    )
+Description: Provides datasets and supporting functions for the book
+    Introduction to Biostatistics with R by Schifano and Yan (2026+),
+    published by Taylor & Francis. The package is intended for teaching
+    introductory biostatistics and for reproducing examples in the text.
+Depends:
+    R (>= 4.4.0)
+VignetteBuilder: knitr
+License: GPL (>= 3)
+URL: https://github.com/statds/ibist-R
+BugReports: https://github.com/statds/ibist-R/issues
+Imports: stats, rlang, ggplot2
+Suggests: knitr, testthat (>= 3.0.0)
+LazyData: true
+RoxygenNote: 7.3.2
+Encoding: UTF-8
+Config/testthat/edition: 3
diff --git a/LICENSE b/LICENSE
diff --git a/Makefile b/Makefile
@@ -0,0 +1,74 @@
+objects := $(wildcard R/*.R) DESCRIPTION
+version := $(shell grep -E "^Version:" DESCRIPTION | awk '{print $$NF}')
+pkg := $(shell  grep -E "^Package:" DESCRIPTION | awk '{print $$NF}')
+tar := $(pkg)_$(version).tar.gz
+tinytest := $(wildcard inst/tinytest/*.R)
+checkLog := $(pkg).Rcheck/00check.log
+rmd := $(wildcard vignettes/*.Rmd)
+vignettes := $(patsubst %.Rmd,%.html,$(rmd))
+
+
+.PHONY: check
+check: $(checkLog)
+
+.PHONY: build
+build: $(tar)
+
+.PHONY: install
+install:
+	R CMD build .
+	R CMD INSTALL $(tar)
+
+.PHONY: preview
+preview: $(vignettes)
+
+.PHONY: pkgdown
+pkgdown:
+	Rscript -e "library(methods); pkgdown::build_site();"
+
+.PHONY: deploy-pkgdown
+deploy-pkgdown:
+	@bash misc/deploy_docs.sh
+
+.PHONY: check-rcpp
+check-rcpp: $(tar)
+	R CMD INSTALL $(tar)
+	Rscript inst/run_rcpp_test.R > check-rcpp.Rout &
+
+.PHONY: check-revdep
+check-revdep: $(tar)
+	@mkdir -p revdep
+	@rm -rf revdep/{*.Rcheck,*.tar.gz}
+	@cp $(tar) revdep
+	nohup R CMD BATCH --no-save --no-restore misc/revdep_check.R &
+
+$(tar): $(objects)
+	@Rscript -e "library(methods);" \
+	-e "devtools::document();";
+	@$(MAKE) update-timestamp
+	R CMD build .
+
+$(checkLog): $(tar) $(tinytest)
+	R CMD check --as-cran $(tar)
+
+vignettes/%.html: vignettes/%.Rmd
+	Rscript -e "library(methods); rmarkdown::render('$?')"
+
+.PHONY: readme
+readme: README.md
+README.md: README.Rmd
+	@Rscript -e "rmarkdown::render('$<')"
+
+## update copyright year
+.PHONY: update-timestamp
+update-timestamp:
+	@bash misc/update_timestamp.sh
+
+.PHONY: tags
+tags:
+	Rscript -e "utils::rtags(path = 'R', ofile = 'TAGS')"
+
+.PHONY: clean
+clean:
+	@$(RM) -r *~ */*~ *.Rhistroy *.tar.gz src/*.so src/*.o \
+	*.Rcheck/ *.Rout .\#* *_cache
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,22 @@
+# Generated by roxygen2: do not edit by hand
+
+export(demo_clt)
+export(power.p1s.test)
+export(rate.test)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,after_stat)
+importFrom(ggplot2,facet_wrap)
+importFrom(ggplot2,geom_histogram)
+importFrom(ggplot2,geom_line)
+importFrom(ggplot2,ggplot)
+importFrom(ggplot2,labs)
+importFrom(ggplot2,theme_minimal)
+importFrom(rlang,.data)
+importFrom(stats,dbinom)
+importFrom(stats,density)
+importFrom(stats,dnorm)
+importFrom(stats,pbinom)
+importFrom(stats,pnorm)
+importFrom(stats,qbinom)
+importFrom(stats,qnorm)
+importFrom(stats,uniroot)
diff --git a/R/data-nrs.R b/R/data-nrs.R
@@ -0,0 +1,43 @@
+#' Non-restorative sleep and physical activity (Japan cohort study)
+#'
+#' A large observational dataset from a cohort study conducted in Japan
+#' to examine the association between non-restorative sleep (NRS) and
+#' physical activity, gender, and age. The data are used to illustrate
+#' logistic regression modeling for a binary outcome in a large-sample
+#' setting.
+#'
+#' @format
+#' A data frame with 90,122 observations on the following variables:
+#' \describe{
+#'   \item{id}{Subject identifier.}
+#'   \item{Gender}{Gender of the subject (integer-coded).}
+#'   \item{Age_2013}{Age in years in 2013.}
+#'   \item{EX_2013}{Indicator of regular exercise in 2013
+#'   (integer-coded).}
+#'   \item{PA_2013}{Physical activity measure in 2013
+#'   (integer-coded).}
+#'   \item{NRS_2013}{Indicator of non-restorative sleep in 2013
+#'   (1 = presence, 0 = absence).}
+#'   \item{AgeGroup_2013}{Categorical age group in 2013
+#'   (integer-coded).}
+#'   \item{EXPA_classification}{Combined classification of exercise and
+#'   physical activity status (integer-coded).}
+#' }
+#'
+#' @details
+#' Non-restorative sleep (NRS) is defined as a subjective feeling of lack
+#' of refreshment on awakening and reflects qualitative aspects of sleep.
+#' Hidaka et al. (2019) analyzed these data using logistic regression to
+#' assess whether the probability of NRS is associated with physical
+#' activity, gender, and age in a large cohort of adult subjects in
+#' Japan. Within this package, the dataset is provided for methodological
+#' illustration of binary regression models rather than for substantive
+#' epidemiological inference.
+#'
+#' All variables are stored as integer codes. Missing values are
+#' represented as \code{NA}.
+#'
+#' @source
+#' Hidaka et al. (2019).
+#'
+"nrs"
diff --git a/R/demo_clt.R b/R/demo_clt.R
@@ -0,0 +1,120 @@
+#' Demonstrate the Central Limit Theorem
+#'
+#' The \code{demo_clt()} function generates plots to illustrate the
+#' Central Limit Theorem (CLT) using a specified random number generator.
+#' The function displays standardized sampling distributions for
+#' different sample sizes and overlays the standard normal density.
+#'
+#' @param rng A random number generator function taking the sample size
+#'   as its first argument (e.g., \code{runif}, \code{rnorm},
+#'   \code{rgamma}).
+#' @param n A numeric vector of sample sizes (e.g., \code{c(5, 10, 20,
+#'   40)}).
+#' @param nrep The number of repetitions for generating sample means
+#'   (default is 10000).
+#' @param ... Additional arguments passed to the random number generator
+#'   (e.g., \code{shape} and \code{rate} for \code{rgamma}).
+#' @param pmean The population mean of the distribution. If \code{NULL},
+#'   it is estimated from a large Monte Carlo sample.
+#' @param psd The population standard deviation of the distribution.
+#'   If \code{NULL}, it is estimated from a large Monte Carlo sample.
+#'
+#' @return A \code{ggplot2} object showing the standardized sampling
+#'   distributions for different sample sizes, compared against the
+#'   standard normal curve.
+#'
+#' @examples
+#' set.seed(123)
+#' demo_clt(runif, n = c(5, 10, 20, 40), min = 0, max = 1)
+#'
+#' demo_clt(rgamma, n = c(5, 10, 20, 40), shape = 2, rate = 1,
+#'          pmean = 2, psd = sqrt(2)
+#' )
+#'
+#' @importFrom rlang .data
+#' @importFrom ggplot2 ggplot geom_histogram geom_line aes
+#' @importFrom ggplot2 facet_wrap labs theme_minimal after_stat
+#' @export
+demo_clt <- function(
+  rng,
+  n,
+  nrep = 10000,
+  ...,
+  pmean = NULL,
+  psd = NULL
+) {
+  ## ---- basic validation ----
+  if (!is.function(rng)) {
+    stop("The argument 'rng' must be a function.", call. = FALSE)
+  }
+
+  if (!is.numeric(n) || any(n <= 0)) {
+    stop(
+      "The argument 'n' must be a numeric vector of positive values.",
+      call. = FALSE
+    )
+  }
+
+  if (!is.numeric(nrep) || length(nrep) != 1L || nrep <= 0) {
+    stop(
+      "The argument 'nrep' must be a positive integer.",
+      call. = FALSE
+    )
+  }
+
+  ## ---- estimate pmean and psd if needed ----
+  if (is.null(pmean) || is.null(psd)) {
+    sample_data <- rng(100000, ...)
+    if (is.null(pmean)) pmean <- base::mean(sample_data)
+    if (is.null(psd)) psd <- stats::sd(sample_data)
+  }
+
+  ## ---- generate standardized sample means ----
+  results <- vector("list", length(n))
+  names(results) <- as.character(n)
+
+  for (size in n) {
+    rng_local <- function() rng(size, ...)
+    sample_means <- replicate(nrep, base::mean(rng_local()))
+
+    results[[as.character(size)]] <- data.frame(
+      StdMean = (sample_means - pmean) / (psd / sqrt(size)),
+      SampleSize = size
+    )
+  }
+
+  data <- do.call(rbind, results)
+
+  ## ---- standard normal reference ----
+  x_vals <- seq(-4, 4, length.out = 200)
+  normal_data <- data.frame(
+    x = x_vals,
+    y = stats::dnorm(x_vals)
+  )
+
+  ## ---- plot ----
+  ggplot2::ggplot(
+  data,
+  ggplot2::aes(x = .data$StdMean)
+  ) +
+  ggplot2::geom_histogram(
+    ggplot2::aes(y = ggplot2::after_stat(density)),
+    bins = 30,
+    color = "black",
+    fill = "skyblue"
+  ) +
+  ggplot2::geom_line(
+    data = normal_data,
+    ggplot2::aes(x = .data$x, y = .data$y),
+    color = "red",
+    linetype = "dashed",
+    linewidth = 0.8
+  ) +
+  ggplot2::facet_wrap(~ SampleSize) +
+  ggplot2::labs(
+    title = "Demonstrating the Central Limit Theorem",
+    x = "Standardized sample mean",
+    y = "Density"
+  ) +
+  ggplot2::theme_minimal()
+}