diff --git a/DESCRIPTION b/DESCRIPTION
index d3add72..7dd7fe9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: irxforge
 Title: Forging data for pharmacometric analyses
-Version: 0.0.0.9004
+Version: 0.0.0.9006
 Authors@R: c(
     person("Ron", "Keizer", email = "ron@insight-rx.com", role = c("cre", "aut")),
     person("Michael", "McCarthy", email = "michael.mccarthy@insight-rx.com", role = "ctb"),
diff --git a/NAMESPACE b/NAMESPACE
index 23045ba..c93d68c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(add_nominal_timepoints)
+export(check_nm_dataset)
 export(create_demographics_table)
 export(download_nhanes_cache)
 export(get_nominal_timepoints)
diff --git a/R/check_nm_dataset.R b/R/check_nm_dataset.R
new file mode 100644
index 0000000..84ec070
--- /dev/null
+++ b/R/check_nm_dataset.R
@@ -0,0 +1,697 @@
+#' Run automated quality checks on a NONMEM dataset
+#'
+#' Performs a comprehensive set of checks on a NONMEM-style dataset and returns
+#' a summary table with the result of each check. No errors are thrown; all
+#' issues are reported as rows in the output table.
+#'
+#' @param data a data.frame (or tibble) containing the NONMEM dataset to check.
+#' @param dictionary an optional named list that maps standard NONMEM column
+#'   names to the actual column names in `data`. For example,
+#'   `list(ID = "NMID", TIME = "TAD")` means the column called `"NMID"` in
+#'   `data` plays the role of `ID`, and `"TAD"` plays the role of `TIME`.
+#'   Any standard name not listed in the dictionary is assumed to match the
+#'   column name in `data` as-is. Supported keys: `ID`, `TIME`, `DV`, `MDV`,
+#'   `EVID`, `AMT`, `CMT`, `RATE`, `SS`, `ADDL`, `II`.
+#' @param exclusion_flag character name of the column used as an exclusion flag
+#'   (for `$DATA ACCEPT` / `IGNORE` in NONMEM). If `NULL` (default), the
+#'   exclusion-flag check is skipped.
+#' @param max_digits integer; threshold for flagging numbers with too many
+#'   significant digits (default `10`).
+#' @param max_id_digits integer; preferred maximum number of characters for the
+#'   ID variable (default `5`).
+#' @param extra_reserved character vector of additional reserved NONMEM names
+#'   to check for (e.g. PK parameters used in ADVAN/TRANS such as `"CL"`,
+#'   `"V"`, `"V1"`, `"K12"`).
+#' @param verbose logical; if `TRUE` (default), messages are printed for every
+#'   check as it is evaluated.
+#'
+#' @returns A data.frame with columns `check` (short test name), `description`
+#'   (human-readable explanation), and `result` (`"PASS"`, `"FAIL"`, or
+#'   `"NA"` when a check is not applicable).
+#'
+#' @export
+check_nm_dataset <- function(
+  data,
+  dictionary = NULL,
+  exclusion_flag = NULL,
+  max_digits = 10,
+  max_id_digits = 5,
+  extra_reserved = NULL,
+  verbose = TRUE
+) {
+
+  # ──────────────────────────────────────────────────────────────────────────
+
+  # Resolve dictionary: map standard NONMEM names → actual column names
+
+  # ──────────────────────────────────────────────────────────────────────────
+  nm_keys <- c("ID", "TIME", "DV", "MDV", "EVID", "AMT", "CMT",
+               "RATE", "SS", "ADDL", "II")
+  col_map <- stats::setNames(nm_keys, nm_keys)          # default: name == column
+  if (!is.null(dictionary)) {
+    dict_upper <- stats::setNames(
+      as.character(dictionary),
+      toupper(names(dictionary))
+    )
+    unknown <- setdiff(names(dict_upper), nm_keys)
+    if (length(unknown) > 0) {
+      warning(
+        "Dictionary keys not recognised (ignored): ",
+        paste(unknown, collapse = ", ")
+      )
+    }
+    for (k in intersect(names(dict_upper), nm_keys)) {
+      col_map[[k]] <- dict_upper[[k]]
+    }
+    if (verbose) {
+      mapped <- col_map[col_map != names(col_map)]
+      if (length(mapped) > 0) {
+        message(
+          "Dictionary mapping: ",
+          paste(names(mapped), "->", mapped, collapse = ", ")
+        )
+      }
+    }
+  }
+
+  # Helper: resolve a standard NONMEM name to its actual column name
+  col_for <- function(nm_name) col_map[[nm_name]]
+
+  results <- data.frame(
+    check = character(0),
+    description = character(0),
+    result = character(0),
+    stringsAsFactors = FALSE
+  )
+
+  log_check <- function(check, description, result) {
+    if (verbose) {
+      symbol <- switch(result, PASS = "[PASS]", FAIL = "[FAIL]", "[ NA ]")
+      message(sprintf("%-8s %s", symbol, description))
+    }
+    results[nrow(results) + 1, ] <<- list(check, description, result)
+  }
+
+  # Column accessor helpers — all use dictionary-resolved names
+  has_col <- function(nm_name) col_for(nm_name) %in% names(data)
+  get_col <- function(nm_name) data[[col_for(nm_name)]]
+  cols <- toupper(names(data))
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Required columns
+  # ──────────────────────────────────────────────────────────────────────────
+  required <- c("ID", "TIME", "DV")
+  present <- vapply(required, function(k) col_for(k) %in% names(data), logical(1))
+  log_check(
+    "required_cols",
+    paste0(
+      "Minimum required columns present (",
+      paste(vapply(required, col_for, character(1)), collapse = ", "), ")"
+    ),
+    if (all(present)) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Unique column names
+  # ──────────────────────────────────────────────────────────────────────────
+  log_check(
+    "unique_colnames",
+    "Column names are unique",
+    if (anyDuplicated(names(data)) == 0) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # No special characters in column names
+  # ──────────────────────────────────────────────────────────────────────────
+  bad_names <- grep("[^A-Za-z0-9_]", names(data), value = TRUE)
+  log_check(
+    "colname_chars",
+    "Column names contain no special characters",
+    if (length(bad_names) == 0) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Reserved NONMEM names
+  # ──────────────────────────────────────────────────────────────────────────
+  reserved <- c(
+    "L1", "L2", "DATE", "DAT1", "DAT2", "DAT3",
+    "PCMT", "CALL", "CONT", "NEWIND", "W",
+    "ETA", "THETA", "EPS", "ERR",
+    "IPRED", "IRES", "IWRES", "CWRES", "PHI"
+  )
+  if (!is.null(extra_reserved)) {
+    reserved <- unique(c(reserved, toupper(extra_reserved)))
+  }
+  used_reserved <- intersect(cols, reserved)
+  log_check(
+    "reserved_names",
+    "No reserved NONMEM variable names used as column names",
+    if (length(used_reserved) == 0) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Character columns warning
+  # ──────────────────────────────────────────────────────────────────────────
+  char_cols <- names(data)[vapply(data, is.character, logical(1))]
+  log_check(
+    "char_columns",
+    "No character-type columns present (may cause NONMEM compilation failure)",
+    if (length(char_cols) == 0) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # NA represented as "." not "NA"
+  # ──────────────────────────────────────────────────────────────────────────
+  has_na_string <- any(vapply(data, function(x) any(x == "NA", na.rm = TRUE), logical(1)))
+  log_check(
+    "na_coding",
+    "Missing values coded as '.' and not as string 'NA'",
+    if (!has_na_string) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Commas inside character values (problematic for CSV)
+  # ──────────────────────────────────────────────────────────────────────────
+  has_commas <- any(vapply(
+    data[vapply(data, is.character, logical(1))],
+    function(x) any(grepl(",", x, fixed = TRUE), na.rm = TRUE),
+    logical(1)
+  ))
+  log_check(
+    "csv_commas",
+    "Character values do not contain commas (safe for CSV)",
+    if (!has_commas) "PASS" else "FAIL"
+  )
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Exclusion flag
+  # ──────────────────────────────────────────────────────────────────────────
+  if (!is.null(exclusion_flag)) {
+    if (exclusion_flag %in% names(data)) {
+      ef <- data[[exclusion_flag]]
+      ef_ok <- !any(is.na(ef)) && is.numeric(ef) && all(ef == round(ef))
+      log_check(
+        "exclusion_flag",
+        paste0("Exclusion flag '", exclusion_flag, "' is non-missing and integer"),
+        if (ef_ok) "PASS" else "FAIL"
+      )
+    } else {
+      log_check(
+        "exclusion_flag",
+        paste0("Exclusion flag '", exclusion_flag, "' exists in dataset"),
+        "FAIL"
+      )
+    }
+  } else {
+    log_check(
+      "exclusion_flag",
+      "Exclusion flag check (not specified)",
+      "NA"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # TIME checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("TIME")) {
+    time <- get_col("TIME")
+    log_check(
+      "time_numeric",
+      "TIME is numeric",
+      if (is.numeric(time)) "PASS" else "FAIL"
+    )
+    if (is.numeric(time)) {
+      log_check(
+        "time_nonneg",
+        "TIME is non-negative",
+        if (all(time >= 0, na.rm = TRUE)) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # TIME non-decreasing within ID (except EVID 3, 4)
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("TIME") && has_col("ID") && is.numeric(get_col("TIME"))) {
+    time_ok <- TRUE
+    id_vec <- get_col("ID")
+    time_vec <- get_col("TIME")
+    evid_vec <- if (has_col("EVID")) get_col("EVID") else NULL
+    for (id in unique(id_vec)) {
+      idx <- id_vec == id
+      t <- time_vec[idx]
+      if (!is.null(evid_vec)) {
+        keep <- !(evid_vec[idx] %in% c(3, 4))
+        t <- t[keep]
+      }
+      if (length(t) > 1 && any(diff(t) < 0, na.rm = TRUE)) {
+        time_ok <- FALSE
+        break
+      }
+    }
+    log_check(
+      "time_order",
+      "TIME is non-decreasing within each ID (excluding EVID 3/4 reset events)",
+      if (time_ok) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # EVID checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("EVID")) {
+    evid <- get_col("EVID")
+    log_check(
+      "evid_values",
+      "EVID values are in {0, 1, 2, 3, 4}",
+      if (all(evid %in% c(0, 1, 2, 3, 4), na.rm = TRUE)) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # CMT checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("CMT")) {
+    cmt <- get_col("CMT")
+    log_check(
+      "cmt_integer",
+      "CMT values are integers",
+      if (is.numeric(cmt) && all(cmt == round(cmt), na.rm = TRUE)) "PASS" else "FAIL"
+    )
+    if (has_col("EVID")) {
+      evid <- get_col("EVID")
+      non_reset <- evid != 3
+      cmt_sub <- cmt[non_reset]
+      cmt_sub <- cmt_sub[!is.na(cmt_sub)]
+      log_check(
+        "cmt_positive",
+        "CMT is non-zero for non-reset (EVID!=3) records (negative allowed for emptying)",
+        if (all(cmt_sub != 0)) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # DV checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("DV")) {
+    dv <- get_col("DV")
+    log_check(
+      "dv_numeric",
+      "DV is numeric",
+      if (is.numeric(dv)) "PASS" else "FAIL"
+    )
+    if (has_col("EVID")) {
+      evid <- get_col("EVID")
+      dose_evids <- evid %in% c(1, 4)
+      dv_at_dose <- dv[dose_evids]
+      log_check(
+        "dv_missing_dose",
+        "DV is missing (NA or 0) for dosing records (EVID 1, 4)",
+        if (all(is.na(dv_at_dose) | dv_at_dose == 0)) "PASS" else "FAIL"
+      )
+    }
+    if (has_col("EVID") && is.numeric(dv)) {
+      evid <- get_col("EVID")
+      obs_dv <- dv[evid == 0]
+      log_check(
+        "dv_obs_nonmissing",
+        "DV is non-missing for observation records (EVID 0)",
+        if (!any(is.na(obs_dv))) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # MDV checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("MDV")) {
+    mdv <- get_col("MDV")
+    log_check(
+      "mdv_binary",
+      "MDV is binary (0 or 1)",
+      if (all(mdv %in% c(0, 1), na.rm = TRUE)) "PASS" else "FAIL"
+    )
+    if (has_col("DV") && is.numeric(get_col("DV")) && has_col("EVID")) {
+      dv <- get_col("DV")
+      evid <- get_col("EVID")
+      obs <- evid == 0
+      # Treat NA and DV <= 0 as missing/BLQ → MDV should be 1;
+      # DV > 0 → MDV should be 0
+      expected_mdv <- ifelse(is.na(dv[obs]) | dv[obs] <= 0, 1, 0)
+      log_check(
+        "mdv_dv_consistent",
+        "MDV matches DV for observations: MDV=1 when DV is NA or <= 0 (BLQ), MDV=0 when DV > 0",
+        if (all(mdv[obs] == expected_mdv, na.rm = TRUE)) "PASS" else "FAIL"
+      )
+      dosing <- evid %in% c(1, 4)
+      log_check(
+        "mdv_dosing",
+        "MDV is 1 for dosing records (EVID 1, 4)",
+        if (all(mdv[dosing] == 1, na.rm = TRUE)) "PASS" else "FAIL"
+      )
+    }
+    if (has_col("DV") && is.numeric(get_col("DV"))) {
+      dv <- get_col("DV")
+      valid_obs <- !is.na(dv) & dv > 0
+      if (has_col("EVID")) {
+        valid_obs <- valid_obs & get_col("EVID") == 0
+      }
+      log_check(
+        "mdv_nonmissing_obs",
+        "MDV is 0 for valid observation records (DV > 0)",
+        if (all(mdv[valid_obs] == 0, na.rm = TRUE)) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # AMT checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("AMT")) {
+    amt <- get_col("AMT")
+    log_check(
+      "amt_numeric",
+      "AMT is numeric",
+      if (is.numeric(amt)) "PASS" else "FAIL"
+    )
+    if (has_col("EVID") && is.numeric(amt)) {
+      evid <- get_col("EVID")
+      obs_other <- evid %in% c(0, 2)
+      amt_obs <- amt[obs_other]
+      log_check(
+        "amt_zero_obs",
+        "AMT is 0 or missing for observation/other-type records (EVID 0, 2)",
+        if (all(is.na(amt_obs) | amt_obs == 0)) "PASS" else "FAIL"
+      )
+      dose_recs <- evid %in% c(1, 4)
+      amt_dose <- amt[dose_recs]
+      log_check(
+        "amt_positive_dose",
+        "AMT is positive for dosing records (EVID 1, 4)",
+        if (all(amt_dose > 0, na.rm = TRUE)) "PASS" else "FAIL"
+      )
+    }
+    if (has_col("EVID") && is.numeric(amt)) {
+      evid <- get_col("EVID")
+      amt_pos <- !is.na(amt) & amt > 0
+      log_check(
+        "amt_evid_consistent",
+        "Records with AMT > 0 have EVID = 1 (or 4)",
+        if (all(evid[amt_pos] %in% c(1, 4))) "PASS" else "FAIL"
+      )
+    }
+    if (has_col("EVID")) {
+      evid <- get_col("EVID")
+      dose_recs <- evid %in% c(1, 4)
+      log_check(
+        "amt_nonmissing_dose",
+        "AMT is non-missing for dosing records (EVID 1, 4)",
+        if (!any(is.na(amt[dose_recs]))) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # RATE checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("RATE")) {
+    rate <- get_col("RATE")
+    log_check(
+      "rate_numeric",
+      "RATE is numeric",
+      if (is.numeric(rate)) "PASS" else "FAIL"
+    )
+    if (is.numeric(rate) && has_col("EVID")) {
+      evid <- get_col("EVID")
+      dose_recs <- evid %in% c(1, 4)
+      rate_dose <- rate[dose_recs]
+      rate_dose <- rate_dose[!is.na(rate_dose)]
+      log_check(
+        "rate_dose_values",
+        "RATE for dosing records is -2, -1, 0, or positive",
+        if (all(rate_dose %in% c(-2, -1, 0) | rate_dose > 0)) "PASS" else "FAIL"
+      )
+    }
+  } else {
+    log_check("rate_present", "RATE column present", "NA")
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # SS checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("SS")) {
+    ss <- get_col("SS")
+    log_check(
+      "ss_numeric",
+      "SS is numeric",
+      if (is.numeric(ss)) "PASS" else "FAIL"
+    )
+    if (is.numeric(ss) && has_col("EVID")) {
+      evid <- get_col("EVID")
+      dose_recs <- evid %in% c(1, 4)
+      ss_dose <- ss[dose_recs]
+      ss_dose <- ss_dose[!is.na(ss_dose)]
+      log_check(
+        "ss_values",
+        "SS for dosing records is 0 or 1",
+        if (all(ss_dose %in% c(0, 1))) "PASS" else "FAIL"
+      )
+    }
+  } else {
+    log_check("ss_present", "SS column present", "NA")
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # ADDL / II checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("ADDL") || has_col("II")) {
+    log_check(
+      "addl_ii_paired",
+      "ADDL and II are both present (required together)",
+      if (has_col("ADDL") && has_col("II")) "PASS" else "FAIL"
+    )
+    if (has_col("ADDL")) {
+      addl <- get_col("ADDL")
+      log_check(
+        "addl_nonneg_int",
+        "ADDL is a non-negative integer for dosing records",
+        if (is.numeric(addl) && has_col("EVID")) {
+          evid <- get_col("EVID")
+          dose_recs <- evid %in% c(1, 4)
+          addl_d <- addl[dose_recs]
+          addl_d <- addl_d[!is.na(addl_d)]
+          all(addl_d >= 0) && all(addl_d == round(addl_d))
+        } else {
+          FALSE
+        }
+      )
+    }
+    if (has_col("II")) {
+      ii <- get_col("II")
+      log_check(
+        "ii_nonneg_int",
+        "II is a non-negative integer for dosing records",
+        if (is.numeric(ii) && has_col("EVID")) {
+          evid <- get_col("EVID")
+          dose_recs <- evid %in% c(1, 4)
+          ii_d <- ii[dose_recs]
+          ii_d <- ii_d[!is.na(ii_d)]
+          all(ii_d >= 0) && all(ii_d == round(ii_d))
+        } else {
+          FALSE
+        }
+      )
+    }
+  } else {
+    log_check("addl_ii_present", "ADDL/II columns present", "NA")
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Overlapping ADDL-expanded doses
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("ADDL") && has_col("II") && has_col("EVID") && has_col("ID") &&
+      has_col("TIME") && is.numeric(get_col("ADDL")) && is.numeric(get_col("II"))) {
+    overlap_found <- FALSE
+    id_vec <- get_col("ID")
+    time_vec <- get_col("TIME")
+    evid_vec <- get_col("EVID")
+    addl_vec <- get_col("ADDL")
+    ii_vec <- get_col("II")
+    for (id in unique(id_vec)) {
+      idx <- which(id_vec == id)
+      # Sort dose records by TIME to avoid false positives/negatives
+      # when the dataset is not ordered
+      dose_idx <- idx[evid_vec[idx] %in% c(1, 4)]
+      if (length(dose_idx) < 2) next
+      dose_idx <- dose_idx[order(time_vec[dose_idx])]
+      for (i in seq_len(length(dose_idx) - 1)) {
+        ri <- dose_idx[i]
+        rn <- dose_idx[i + 1]
+        addl_i <- addl_vec[ri]
+        ii_i <- ii_vec[ri]
+        if (!is.na(addl_i) && !is.na(ii_i) && addl_i > 0 && ii_i > 0) {
+          last_expanded <- time_vec[ri] + addl_i * ii_i
+          if (last_expanded >= time_vec[rn]) {
+            overlap_found <- TRUE
+            break
+          }
+        }
+      }
+      if (overlap_found) break
+    }
+    log_check(
+      "addl_overlap",
+      "No overlapping ADDL-expanded doses (last expanded dose before next explicit dose)",
+      if (!overlap_found) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # ID checks
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("ID")) {
+    ids <- as.character(get_col("ID"))
+    has_leading_zeros <- any(grepl("^0\\d+", ids))
+    log_check(
+      "id_leading_zeros",
+      "IDs do not have leading zeros (would be lost when NONMEM reads the data)",
+      if (!has_leading_zeros) "PASS" else "FAIL"
+    )
+    max_len <- max(nchar(ids))
+    log_check(
+      "id_digits",
+      paste0("ID variable has <= ", max_id_digits, " digits (longer may need $TABLE FORMAT)"),
+      if (max_len <= max_id_digits) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Unique records (ID, TIME, CMT, EVID)
+  # ──────────────────────────────────────────────────────────────────────────
+  dup_nm_keys <- c("ID", "TIME", "CMT", "EVID")
+  dup_actual <- vapply(dup_nm_keys, col_for, character(1))
+  dup_cols <- dup_actual[dup_actual %in% names(data)]
+  if (length(dup_cols) >= 2) {
+    dup_check <- duplicated(data[, dup_cols, drop = FALSE])
+    log_check(
+      "unique_records",
+      paste0("Unique combination of ", paste(dup_cols, collapse = ", ")),
+      if (!any(dup_check)) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Dataset ordering (ID, TIME)
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("ID") && has_col("TIME")) {
+    id_vec <- get_col("ID")
+    time_vec <- get_col("TIME")
+    ord <- order(id_vec, time_vec)
+    is_sorted <- identical(
+      paste(id_vec, time_vec),
+      paste(id_vec[ord], time_vec[ord])
+    )
+    log_check(
+      "data_ordered",
+      "Dataset is ordered by at least ID and TIME",
+      if (is_sorted) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Large numbers (precision issues)
+  # ──────────────────────────────────────────────────────────────────────────
+  num_cols <- names(data)[vapply(data, is.numeric, logical(1))]
+  if (length(num_cols) > 0) {
+    too_many_digits <- FALSE
+    for (col in num_cols) {
+      vals <- data[[col]]
+      vals <- vals[!is.na(vals) & vals != 0]
+      if (length(vals) > 0) {
+        n_digits <- nchar(gsub("[^0-9]", "", format(vals, scientific = FALSE)))
+        if (any(n_digits > max_digits)) {
+          too_many_digits <- TRUE
+          break
+        }
+      }
+    }
+    log_check(
+      "precision",
+      paste0("Numeric values have <= ", max_digits, " significant digits"),
+      if (!too_many_digits) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # Missing covariate values
+  # ──────────────────────────────────────────────────────────────────────────
+  # Covariates are columns that are not standard NONMEM columns (mapped or not)
+  nm_standard <- c(
+    "ID", "TIME", "DV", "MDV", "EVID", "AMT", "RATE", "SS",
+    "II", "ADDL", "CMT", "GROUP", "BLQ", "LLOQ", "TAD"
+  )
+  # Include both the standard names and the dictionary-mapped actual names
+  all_standard <- unique(c(
+    nm_standard,
+    vapply(intersect(nm_keys, nm_standard), col_for, character(1))
+  ))
+  cov_cols <- setdiff(
+    names(data),
+    c(all_standard, if (!is.null(exclusion_flag)) exclusion_flag)
+  )
+  if (length(cov_cols) > 0) {
+    has_missing_cov <- FALSE
+    for (col in cov_cols) {
+      vals <- data[[col]]
+      has_sentinel <- FALSE
+
+      if (is.numeric(vals)) {
+        has_sentinel <- any(vals %in% c(-999, -99), na.rm = TRUE)
+      } else if (is.character(vals) || is.factor(vals)) {
+        has_sentinel <- any(as.character(vals) %in% c(".", "NA"), na.rm = TRUE)
+      }
+
+      if (any(is.na(vals)) || has_sentinel) {
+        has_missing_cov <- TRUE
+        break
+      }
+    }
+    log_check(
+      "covariate_missing",
+      "No missing or sentinel values (-999, -99, '.', 'NA') in covariate columns",
+      if (!has_missing_cov) "PASS" else "FAIL"
+    )
+  }
+
+  # ──────────────────────────────────────────────────────────────────────────
+  # RATE column consistency with AMT
+  # ──────────────────────────────────────────────────────────────────────────
+  if (has_col("RATE") && has_col("AMT") &&
+      is.numeric(get_col("RATE")) && is.numeric(get_col("AMT"))) {
+    if (has_col("EVID")) {
+      evid <- get_col("EVID")
+      dose_recs <- evid %in% c(1, 4)
+      rate_dose <- get_col("RATE")[dose_recs]
+      amt_dose <- get_col("AMT")[dose_recs]
+      real_rate <- !is.na(rate_dose) & !is.na(amt_dose) & rate_dose > 0
+      log_check(
+        "rate_amt_consistency",
+        "Dosing records with positive RATE (mg/h) also have positive AMT",
+        if (sum(real_rate) == 0 || all(amt_dose[real_rate] > 0)) "PASS" else "FAIL"
+      )
+    }
+  }
+
+  if (verbose) {
+    n_pass <- sum(results$result == "PASS")
+    n_fail <- sum(results$result == "FAIL")
+    n_na <- sum(results$result == "NA")
+    message(sprintf(
+      "\n--- Summary: %d checks | %d PASS | %d FAIL | %d NA ---",
+      nrow(results), n_pass, n_fail, n_na
+    ))
+  }
+
+  results
+}
diff --git a/man/check_nm_dataset.Rd b/man/check_nm_dataset.Rd
new file mode 100644
index 0000000..16a1754
--- /dev/null
+++ b/man/check_nm_dataset.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/check_nm_dataset.R
+\name{check_nm_dataset}
+\alias{check_nm_dataset}
+\title{Run automated quality checks on a NONMEM dataset}
+\usage{
+check_nm_dataset(
+  data,
+  dictionary = NULL,
+  exclusion_flag = NULL,
+  max_digits = 10,
+  max_id_digits = 5,
+  extra_reserved = NULL,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{data}{a data.frame (or tibble) containing the NONMEM dataset to check.}
+
+\item{dictionary}{an optional named list that maps standard NONMEM column
+names to the actual column names in \code{data}. For example,
+\code{list(ID = "NMID", TIME = "TAD")} means the column called \code{"NMID"} in
+\code{data} plays the role of \code{ID}, and \code{"TAD"} plays the role of \code{TIME}.
+Any standard name not listed in the dictionary is assumed to match the
+column name in \code{data} as-is. Supported keys: \code{ID}, \code{TIME}, \code{DV}, \code{MDV},
+\code{EVID}, \code{AMT}, \code{CMT}, \code{RATE}, \code{SS}, \code{ADDL}, \code{II}.}
+
+\item{exclusion_flag}{character name of the column used as an exclusion flag
+(for \verb{$DATA ACCEPT} / \code{IGNORE} in NONMEM). If \code{NULL} (default), the
+exclusion-flag check is skipped.}
+
+\item{max_digits}{integer; threshold for flagging numbers with too many
+significant digits (default \code{10}).}
+
+\item{max_id_digits}{integer; preferred maximum number of characters for the
+ID variable (default \code{5}).}
+
+\item{extra_reserved}{character vector of additional reserved NONMEM names
+to check for (e.g. PK parameters used in ADVAN/TRANS such as \code{"CL"},
+\code{"V"}, \code{"V1"}, \code{"K12"}).}
+
+\item{verbose}{logical; if \code{TRUE} (default), messages are printed for every
+check as it is evaluated.}
+}
+\value{
+A data.frame with columns \code{check} (short test name), \code{description}
+(human-readable explanation), and \code{result} (\code{"PASS"}, \code{"FAIL"}, or
+\code{"NA"} when a check is not applicable).
+}
+\description{
+Performs a comprehensive set of checks on a NONMEM-style dataset and returns
+a summary table with the result of each check. No errors are thrown; all
+issues are reported as rows in the output table.
+}
diff --git a/tests/testthat/test-check_nm_dataset.R b/tests/testthat/test-check_nm_dataset.R
new file mode 100644
index 0000000..4acbf7a
--- /dev/null
+++ b/tests/testthat/test-check_nm_dataset.R
@@ -0,0 +1,368 @@
+# Helper: minimal valid NONMEM dataset
+make_nm <- function(n_id = 2, n_obs = 3) {
+  do.call(rbind, lapply(seq_len(n_id), function(id) {
+    dose <- data.frame(
+      ID = id, TIME = 0, DV = NA_real_, MDV = 1, EVID = 1,
+      AMT = 100, CMT = 1, stringsAsFactors = FALSE
+    )
+    obs <- data.frame(
+      ID = id,
+      TIME = seq_len(n_obs),
+      DV = runif(n_obs, 1, 10),
+      MDV = 0, EVID = 0, AMT = 0, CMT = 1,
+      stringsAsFactors = FALSE
+    )
+    rbind(dose, obs)
+  }))
+}
+
+# Helper: dataset with renamed columns (for dictionary tests)
+make_nm_renamed <- function(n_id = 2, n_obs = 3) {
+  d <- make_nm(n_id = n_id, n_obs = n_obs)
+  names(d) <- c("SUBJ", "TAD", "CONC", "MISS", "FLAG", "DOSE", "COMP")
+  d
+}
+
+test_that("clean dataset passes all applicable checks", {
+  d <- make_nm()
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_true(all(res$result %in% c("PASS", "NA")))
+  expect_s3_class(res, "data.frame")
+  expect_named(res, c("check", "description", "result"))
+})
+
+test_that("missing required columns are flagged", {
+  d <- data.frame(ID = 1, TIME = 0) # no DV
+
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "required_cols"], "FAIL")
+})
+
+test_that("duplicate column names are flagged", {
+  d <- make_nm()
+  names(d)[7] <- "ID" # duplicate
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "unique_colnames"], "FAIL")
+})
+
+test_that("special characters in column names are flagged", {
+  d <- make_nm()
+  names(d)[1] <- "ID#1"
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "colname_chars"], "FAIL")
+})
+
+test_that("reserved NONMEM names are detected", {
+  d <- make_nm()
+  d$THETA <- 1
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "reserved_names"], "FAIL")
+})
+
+test_that("extra_reserved catches PK parameter names", {
+  d <- make_nm()
+  d$CL <- 5
+  # Without extra_reserved, CL is not flagged (it's not in the base list)
+  res1 <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res1$result[res1$check == "reserved_names"], "PASS")
+  # With extra_reserved
+  res2 <- check_nm_dataset(d, extra_reserved = c("CL", "V"), verbose = FALSE)
+  expect_equal(res2$result[res2$check == "reserved_names"], "FAIL")
+})
+
+test_that("negative TIME is flagged", {
+  d <- make_nm()
+  d$TIME[2] <- -1
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "time_nonneg"], "FAIL")
+})
+
+test_that("invalid EVID values are flagged", {
+  d <- make_nm()
+  d$EVID[1] <- 5
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "evid_values"], "FAIL")
+})
+
+test_that("AMT checks work correctly", {
+  d <- make_nm()
+  # AMT > 0 for observation record
+  d$AMT[2] <- 50
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "amt_zero_obs"], "FAIL")
+  expect_equal(res$result[res$check == "amt_evid_consistent"], "FAIL")
+})
+
+test_that("AMT missing for dosing records is flagged", {
+  d <- make_nm()
+  d$AMT[1] <- NA
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "amt_nonmissing_dose"], "FAIL")
+})
+
+test_that("DV present for dosing records is flagged", {
+  d <- make_nm()
+  d$DV[1] <- 999  # dose record should not have a real DV
+
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "dv_missing_dose"], "FAIL")
+})
+
+test_that("MDV = 1 for valid observation (DV > 0) is flagged", {
+  d <- make_nm()
+  d$MDV[2] <- 1  # observation with DV > 0 should have MDV = 0
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "mdv_nonmissing_obs"], "FAIL")
+  expect_equal(res$result[res$check == "mdv_dv_consistent"], "FAIL")
+})
+
+test_that("MDV/DV consistency treats DV <= 0 as BLQ (MDV should be 1)", {
+  d <- make_nm()
+  # Set an observation DV to a negative value (e.g. log-transformed BLQ)
+  # and MDV = 0 — this should FAIL because DV <= 0 requires MDV = 1
+  d$DV[2] <- -2
+  d$MDV[2] <- 0
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "mdv_dv_consistent"], "FAIL")
+
+  # Now set MDV = 1 for the BLQ record — should PASS
+  d$MDV[2] <- 1
+  res2 <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res2$result[res2$check == "mdv_dv_consistent"], "PASS")
+  expect_equal(res2$result[res2$check == "mdv_nonmissing_obs"], "PASS")
+})
+
+test_that("exclusion flag checks", {
+  d <- make_nm()
+  d$EXCL <- c(0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L)
+  res <- check_nm_dataset(d, exclusion_flag = "EXCL", verbose = FALSE)
+  expect_equal(res$result[res$check == "exclusion_flag"], "PASS")
+
+  # NA in exclusion flag
+  d$EXCL[1] <- NA
+
+  res2 <- check_nm_dataset(d, exclusion_flag = "EXCL", verbose = FALSE)
+  expect_equal(res2$result[res2$check == "exclusion_flag"], "FAIL")
+})
+
+test_that("ID leading zeros flagged", {
+  d <- make_nm(n_id = 1)
+  d$ID <- "001"
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "id_leading_zeros"], "FAIL")
+})
+
+test_that("decreasing TIME within ID is flagged", {
+  d <- make_nm(n_id = 1)
+  d$TIME <- c(0, 3, 2, 1)
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "time_order"], "FAIL")
+})
+
+test_that("character columns are flagged", {
+  d <- make_nm()
+  d$STUDY <- "A"
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "char_columns"], "FAIL")
+})
+
+test_that("string 'NA' values are detected", {
+  d <- make_nm()
+  d$DV <- as.character(d$DV)
+  d$DV[2] <- "NA"
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "na_coding"], "FAIL")
+})
+
+test_that("dataset ordering is checked", {
+  d <- make_nm(n_id = 2)
+  d <- d[sample(nrow(d)), ]  # shuffle
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "data_ordered"], "FAIL")
+})
+
+test_that("ADDL/II pairing is checked", {
+  d <- make_nm()
+  d$ADDL <- 0
+  # II missing
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "addl_ii_paired"], "FAIL")
+})
+
+test_that("ADDL overlap is detected", {
+  d <- data.frame(
+    ID = c(1, 1, 1),
+    TIME = c(0, 24, 48),
+    DV = c(NA, 5, 3),
+    MDV = c(1, 0, 0),
+    EVID = c(1, 0, 0),
+    AMT = c(100, 0, 0),
+    CMT = c(1, 1, 1),
+    ADDL = c(5, 0, 0),  # expands to TIME = 0, 12, 24, 36, 48 -- overlaps next dose
+    II = c(12, 0, 0),
+    stringsAsFactors = FALSE
+  )
+  # No second dose record here, so no overlap to detect.
+  # Add a second dose:
+  d2 <- rbind(d[1, ], data.frame(
+    ID = 1, TIME = 24, DV = NA, MDV = 1, EVID = 1,
+    AMT = 100, CMT = 1, ADDL = 0, II = 0, stringsAsFactors = FALSE
+  ), d[2:3, ])
+  res <- check_nm_dataset(d2, verbose = FALSE)
+  # First dose: TIME=0, ADDL=5, II=12 => last expanded at 60 >= next dose at 24
+  expect_equal(res$result[res$check == "addl_overlap"], "FAIL")
+})
+
+test_that("ADDL overlap detection works on unsorted datasets", {
+  # Build an overlapping dataset but shuffle the rows so it's not sorted
+  d <- data.frame(
+    ID   = c(1, 1, 1, 1),
+    TIME = c(0, 24, 24, 48),
+    DV   = c(NA, NA, 5, 3),
+    MDV  = c(1, 1, 0, 0),
+    EVID = c(1, 1, 0, 0),
+    AMT  = c(100, 100, 0, 0),
+    CMT  = c(1, 1, 1, 1),
+    ADDL = c(5, 0, 0, 0),
+    II   = c(12, 0, 0, 0),
+    stringsAsFactors = FALSE
+  )
+  # TIME=0, ADDL=5, II=12 => last expanded at 60 >= next dose at 24
+  # Shuffle rows so dose at TIME=24 comes before dose at TIME=0
+  d_shuffled <- d[c(3, 2, 4, 1), ]
+  res <- check_nm_dataset(d_shuffled, verbose = FALSE)
+  expect_equal(res$result[res$check == "addl_overlap"], "FAIL")
+
+  # Non-overlapping case: reduce ADDL so expansion ends before next dose
+  d2 <- d
+  d2$ADDL[1] <- 1  # last expanded at TIME = 12, next dose at 24 → OK
+  d2_shuffled <- d2[c(3, 2, 4, 1), ]
+  res2 <- check_nm_dataset(d2_shuffled, verbose = FALSE)
+  expect_equal(res2$result[res2$check == "addl_overlap"], "PASS")
+})
+
+test_that("covariate missing values are detected", {
+  d <- make_nm()
+  d$WT <- c(70, 80, NA, 75, 65, 90, 85, 70)
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "covariate_missing"], "FAIL")
+})
+
+test_that("covariate sentinel values (-999) are detected", {
+  d <- make_nm()
+  d$WT <- c(70, 80, -999, 75, 65, 90, 85, 70)
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "covariate_missing"], "FAIL")
+})
+
+test_that("verbose output produces messages", {
+  d <- make_nm()
+  expect_message(check_nm_dataset(d, verbose = TRUE), "PASS")
+})
+
+test_that("SS values checked for dosing records", {
+  d <- make_nm()
+  d$SS <- 0
+  d$SS[1] <- 2  # invalid: SS should be 0 or 1
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "ss_values"], "FAIL")
+})
+
+test_that("RATE column checks", {
+  d <- make_nm()
+  d$RATE <- 0
+  d$RATE[1] <- -3  # invalid
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "rate_dose_values"], "FAIL")
+})
+
+test_that("duplicate records are flagged", {
+  d <- make_nm(n_id = 1)
+  d <- rbind(d, d[2, ])  # duplicate observation record
+  res <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res$result[res$check == "unique_records"], "FAIL")
+})
+
+# ── Dictionary tests ──────────────────────────────────────────────────────────
+
+test_that("dictionary maps renamed columns and passes clean dataset", {
+  d <- make_nm_renamed()
+  dict <- list(
+    ID = "SUBJ", TIME = "TAD", DV = "CONC", MDV = "MISS",
+    EVID = "FLAG", AMT = "DOSE", CMT = "COMP"
+  )
+  res <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  # All applicable checks should pass (or be NA), none should fail
+  expect_true(all(res$result %in% c("PASS", "NA")))
+})
+
+test_that("dictionary resolves required columns correctly", {
+  # Without dictionary, renamed dataset fails required_cols
+  d <- make_nm_renamed()
+  res_no_dict <- check_nm_dataset(d, verbose = FALSE)
+  expect_equal(res_no_dict$result[res_no_dict$check == "required_cols"], "FAIL")
+
+  # With dictionary, it passes
+  dict <- list(ID = "SUBJ", TIME = "TAD", DV = "CONC")
+  res_dict <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  expect_equal(res_dict$result[res_dict$check == "required_cols"], "PASS")
+})
+
+test_that("dictionary catches errors in renamed columns", {
+  d <- make_nm_renamed()
+  d$TAD[2] <- -1  # negative TIME (via TAD)
+  dict <- list(
+    ID = "SUBJ", TIME = "TAD", DV = "CONC", MDV = "MISS",
+    EVID = "FLAG", AMT = "DOSE", CMT = "COMP"
+  )
+  res <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  expect_equal(res$result[res$check == "time_nonneg"], "FAIL")
+})
+
+test_that("dictionary works for AMT/EVID consistency", {
+  d <- make_nm_renamed()
+  d$DOSE[3] <- 50  # AMT > 0 for an observation (EVID = 0)
+  dict <- list(
+    ID = "SUBJ", TIME = "TAD", DV = "CONC", MDV = "MISS",
+    EVID = "FLAG", AMT = "DOSE", CMT = "COMP"
+  )
+  res <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  expect_equal(res$result[res$check == "amt_zero_obs"], "FAIL")
+  expect_equal(res$result[res$check == "amt_evid_consistent"], "FAIL")
+})
+
+test_that("partial dictionary works (unmapped columns keep standard names)", {
+  d <- make_nm()
+  names(d)[1] <- "SUBJ"  # only rename ID
+  dict <- list(ID = "SUBJ")
+  res <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  expect_equal(res$result[res$check == "required_cols"], "PASS")
+  expect_true(all(res$result %in% c("PASS", "NA")))
+})
+
+test_that("dictionary is case-insensitive for keys", {
+  d <- make_nm_renamed()
+  dict <- list(
+    id = "SUBJ", time = "TAD", dv = "CONC", mdv = "MISS",
+    evid = "FLAG", amt = "DOSE", cmt = "COMP"
+  )
+  res <- check_nm_dataset(d, dictionary = dict, verbose = FALSE)
+  expect_equal(res$result[res$check == "required_cols"], "PASS")
+})
+
+test_that("unknown dictionary keys produce a warning", {
+  d <- make_nm()
+  expect_warning(
+    check_nm_dataset(d, dictionary = list(FOOBAR = "X"), verbose = FALSE),
+    "not recognised"
+  )
+})
+
+test_that("dictionary verbose output shows mapping", {
+  d <- make_nm()
+  names(d)[1] <- "SUBJ"
+  expect_message(
+    check_nm_dataset(d, dictionary = list(ID = "SUBJ"), verbose = TRUE),
+    "Dictionary mapping: ID -> SUBJ"
+  )
+})