Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
\.Rcheck$
\.Rout$
\.Rproj$
\.tar\.gz$
^GPATH$
^GRTAGS$
^GTAGS$
^LICENSE$
^Makefile$
^README\.Rmd$
^README\.html$
^README_cache$
^TAGS$
^TODO\.org$
^\#
^\.Rhistory$
^\.Rproj\.user$
^\.\#
^\.clang_complete$
^\.clangd$
^\.git$
^\.github$
^\.gitlab-ci.yml$
^\.travis\.yml$
^_pkgdown\.yml$
^appveyor\.yml$
^docs$
^misc$
^revdep$
^test$
^working$
~$
28 changes: 28 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Package: ibist
Title: Data and Functions for Introduction to Biostatistics with R
Version: 0.1-0
Authors@R: c(
person(given = "Elizabeth", family = "Schifano",
role = c("aut"),
comment = c(ORCID = "0000-0002-9793-332X")),
person(given = "Jun", family = "Yan",
email = "jun.yan@uconn.edu",
role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4401-7296"))
)
Description: Provides datasets and supporting functions for the book
Introduction to Biostatistics with R by Schifano and Yan (2026+),
published by Taylor & Francis. The package is intended for teaching
introductory biostatistics and for reproducing examples in the text.
Depends:
R (>= 4.4.0)
VignetteBuilder: knitr
License: GPL (>= 3)
URL: https://github.com/statds/ibist-R
BugReports: https://github.com/statds/ibist-R/issues
Imports: stats, rlang, ggplot2
Suggests: knitr, testthat (>= 3.0.0)
LazyData: true
RoxygenNote: 7.3.2
Encoding: UTF-8
Config/testthat/edition: 3
661 changes: 0 additions & 661 deletions LICENSE

This file was deleted.

74 changes: 74 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
objects := $(wildcard R/*.R) DESCRIPTION
version := $(shell grep -E "^Version:" DESCRIPTION | awk '{print $$NF}')
pkg := $(shell grep -E "^Package:" DESCRIPTION | awk '{print $$NF}')
tar := $(pkg)_$(version).tar.gz
tinytest := $(wildcard inst/tinytest/*.R)
checkLog := $(pkg).Rcheck/00check.log
rmd := $(wildcard vignettes/*.Rmd)
vignettes := $(patsubst %.Rmd,%.html,$(rmd))


.PHONY: check
check: $(checkLog)

.PHONY: build
build: $(tar)

.PHONY: install
install:
R CMD build .
R CMD INSTALL $(tar)

.PHONY: preview
preview: $(vignettes)

.PHONY: pkgdown
pkgdown:
Rscript -e "library(methods); pkgdown::build_site();"

.PHONY: deploy-pkgdown
deploy-pkgdown:
@bash misc/deploy_docs.sh

.PHONY: check-rcpp
check-rcpp: $(tar)
R CMD INSTALL $(tar)
Rscript inst/run_rcpp_test.R > check-rcpp.Rout &

.PHONY: check-revdep
check-revdep: $(tar)
@mkdir -p revdep
@rm -rf revdep/{*.Rcheck,*.tar.gz}
@cp $(tar) revdep
nohup R CMD BATCH --no-save --no-restore misc/revdep_check.R &

$(tar): $(objects)
@Rscript -e "library(methods);" \
-e "devtools::document();";
@$(MAKE) update-timestamp
R CMD build .

$(checkLog): $(tar) $(tinytest)
R CMD check --as-cran $(tar)

vignettes/%.html: vignettes/%.Rmd
Rscript -e "library(methods); rmarkdown::render('$?')"

.PHONY: readme
readme: README.md
README.md: README.Rmd
@Rscript -e "rmarkdown::render('$<')"

## update copyright year
.PHONY: update-timestamp
update-timestamp:
@bash misc/update_timestamp.sh

.PHONY: tags
tags:
Rscript -e "utils::rtags(path = 'R', ofile = 'TAGS')"

.PHONY: clean
clean:
@$(RM) -r *~ */*~ *.Rhistroy *.tar.gz src/*.so src/*.o \
*.Rcheck/ *.Rout .\#* *_cache
22 changes: 22 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by roxygen2: do not edit by hand

export(demo_clt)
export(power.p1s.test)
export(rate.test)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_histogram)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,theme_minimal)
importFrom(rlang,.data)
importFrom(stats,dbinom)
importFrom(stats,density)
importFrom(stats,dnorm)
importFrom(stats,pbinom)
importFrom(stats,pnorm)
importFrom(stats,qbinom)
importFrom(stats,qnorm)
importFrom(stats,uniroot)
43 changes: 43 additions & 0 deletions R/data-nrs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Non-restorative sleep and physical activity (Japan cohort study)
#'
#' A large observational dataset from a cohort study conducted in Japan
#' to examine the association between non-restorative sleep (NRS) and
#' physical activity, gender, and age. The data are used to illustrate
#' logistic regression modeling for a binary outcome in a large-sample
#' setting.
#'
#' @format
#' A data frame with 90,122 observations on the following variables:
#' \describe{
#' \item{id}{Subject identifier.}
#' \item{Gender}{Gender of the subject (integer-coded).}
#' \item{Age_2013}{Age in years in 2013.}
#' \item{EX_2013}{Indicator of regular exercise in 2013
#' (integer-coded).}
#' \item{PA_2013}{Physical activity measure in 2013
#' (integer-coded).}
#' \item{NRS_2013}{Indicator of non-restorative sleep in 2013
#' (1 = presence, 0 = absence).}
#' \item{AgeGroup_2013}{Categorical age group in 2013
#' (integer-coded).}
#' \item{EXPA_classification}{Combined classification of exercise and
#' physical activity status (integer-coded).}
#' }
#'
#' @details
#' Non-restorative sleep (NRS) is defined as a subjective feeling of lack
#' of refreshment on awakening and reflects qualitative aspects of sleep.
#' Hidaka et al. (2019) analyzed these data using logistic regression to
#' assess whether the probability of NRS is associated with physical
#' activity, gender, and age in a large cohort of adult subjects in
#' Japan. Within this package, the dataset is provided for methodological
#' illustration of binary regression models rather than for substantive
#' epidemiological inference.
#'
#' All variables are stored as integer codes. Missing values are
#' represented as \code{NA}.
#'
#' @source
#' Hidaka et al. (2019).
#'
"nrs"
120 changes: 120 additions & 0 deletions R/demo_clt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' Demonstrate the Central Limit Theorem
#'
#' The \code{demo_clt()} function generates plots to illustrate the
#' Central Limit Theorem (CLT) using a specified random number generator.
#' The function displays standardized sampling distributions for
#' different sample sizes and overlays the standard normal density.
#'
#' @param rng A random number generator function taking the sample size
#' as its first argument (e.g., \code{runif}, \code{rnorm},
#' \code{rgamma}).
#' @param n A numeric vector of sample sizes (e.g., \code{c(5, 10, 20,
#' 40)}).
#' @param nrep The number of repetitions for generating sample means
#' (default is 10000).
#' @param ... Additional arguments passed to the random number generator
#' (e.g., \code{shape} and \code{rate} for \code{rgamma}).
#' @param pmean The population mean of the distribution. If \code{NULL},
#' it is estimated from a large Monte Carlo sample.
#' @param psd The population standard deviation of the distribution.
#' If \code{NULL}, it is estimated from a large Monte Carlo sample.
#'
#' @return A \code{ggplot2} object showing the standardized sampling
#' distributions for different sample sizes, compared against the
#' standard normal curve.
#'
#' @examples
#' set.seed(123)
#' demo_clt(runif, n = c(5, 10, 20, 40), min = 0, max = 1)
#'
#' demo_clt(rgamma, n = c(5, 10, 20, 40), shape = 2, rate = 1,
#' pmean = 2, psd = sqrt(2)
#' )
#'
#' @importFrom rlang .data
#' @importFrom ggplot2 ggplot geom_histogram geom_line aes
#' @importFrom ggplot2 facet_wrap labs theme_minimal after_stat
#' @export
demo_clt <- function(
rng,
n,
nrep = 10000,
...,
pmean = NULL,
psd = NULL
) {
## ---- basic validation ----
if (!is.function(rng)) {
stop("The argument 'rng' must be a function.", call. = FALSE)
}

if (!is.numeric(n) || any(n <= 0)) {
stop(
"The argument 'n' must be a numeric vector of positive values.",
call. = FALSE
)
}

if (!is.numeric(nrep) || length(nrep) != 1L || nrep <= 0) {
stop(
"The argument 'nrep' must be a positive integer.",
call. = FALSE
)
}

## ---- estimate pmean and psd if needed ----
if (is.null(pmean) || is.null(psd)) {
sample_data <- rng(100000, ...)
if (is.null(pmean)) pmean <- base::mean(sample_data)
if (is.null(psd)) psd <- stats::sd(sample_data)
}

## ---- generate standardized sample means ----
results <- vector("list", length(n))
names(results) <- as.character(n)

for (size in n) {
rng_local <- function() rng(size, ...)
sample_means <- replicate(nrep, base::mean(rng_local()))

results[[as.character(size)]] <- data.frame(
StdMean = (sample_means - pmean) / (psd / sqrt(size)),
SampleSize = size
)
}

data <- do.call(rbind, results)

## ---- standard normal reference ----
x_vals <- seq(-4, 4, length.out = 200)
normal_data <- data.frame(
x = x_vals,
y = stats::dnorm(x_vals)
)

## ---- plot ----
ggplot2::ggplot(
data,
ggplot2::aes(x = .data$StdMean)
) +
ggplot2::geom_histogram(
ggplot2::aes(y = ggplot2::after_stat(density)),
bins = 30,
color = "black",
fill = "skyblue"
) +
ggplot2::geom_line(
data = normal_data,
ggplot2::aes(x = .data$x, y = .data$y),
color = "red",
linetype = "dashed",
linewidth = 0.8
) +
ggplot2::facet_wrap(~ SampleSize) +
ggplot2::labs(
title = "Demonstrating the Central Limit Theorem",
x = "Standardized sample mean",
y = "Density"
) +
ggplot2::theme_minimal()
}
Loading