From e3d15495c54d2133752e408d33a8fef8c3289dee Mon Sep 17 00:00:00 2001 From: Jeff Dickinson Date: Thu, 11 Dec 2025 16:22:17 -0600 Subject: [PATCH 1/5] #117 add tests for sort --- tests/testthat/test-sort.R | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/testthat/test-sort.R b/tests/testthat/test-sort.R index c3c9d57..7e38088 100644 --- a/tests/testthat/test-sort.R +++ b/tests/testthat/test-sort.R @@ -30,3 +30,57 @@ test_that("sort_key", { sort_by_key(spec) %>% expect_equal(data) }) + +test_that("order_cols warns about deprecated dataset_name parameter", { + library(metacore) + library(haven) + + load(metacore_example("pilot_ADaM.rda")) + data <- read_xpt(metatools_example("adsl.xpt")) + + # Test that using dataset_name triggers deprecation warning + lifecycle::expect_deprecated( + order_cols(data, metacore, dataset_name = "ADSL") + ) +}) + +test_that("order_cols still works with deprecated dataset_name parameter", { + library(metacore) + library(haven) + + load(metacore_example("pilot_ADaM.rda")) + data <- read_xpt(metatools_example("adsl.xpt")) + + # Suppress warning to test functionality + suppressWarnings({ + result <- order_cols(data, metacore, dataset_name = "ADSL") + }) + + expect_s3_class(result, "data.frame") +}) + +test_that("sort_by_key warns about deprecated dataset_name parameter", { + library(metacore) + library(haven) + + load(metacore_example("pilot_ADaM.rda")) + data <- read_xpt(metatools_example("adsl.xpt")) + + lifecycle::expect_deprecated( + sort_by_key(data, metacore, dataset_name = "ADSL") + ) +}) + +test_that("sort_by_key still works with deprecated dataset_name parameter", { + library(metacore) + library(haven) + + load(metacore_example("pilot_ADaM.rda")) + data <- read_xpt(metatools_example("adsl.xpt")) + + suppressWarnings({ + result <- sort_by_key(data, metacore, dataset_name = "ADSL") + }) + + expect_s3_class(result, "data.frame") +}) \ No newline at end of file From 150c53376b32037e1d7762ee5580dbe8beae4b88 Mon Sep 17 00:00:00 2001 From: Jeff Dickinson Date: Thu, 11 Dec 2025 16:33:12 -0600 Subject: [PATCH 2/5] 117 Add tests to supp --- tests/testthat/test-supp.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-supp.R b/tests/testthat/test-supp.R index eeb3a41..1261148 100644 --- a/tests/testthat/test-supp.R +++ b/tests/testthat/test-supp.R @@ -272,3 +272,25 @@ test_that("combine_supp() does not create an IDVARVAL column (#78)", { noidvarval <- combine_supp(simple_ae, simple_suppae) expect_false("IDVARVAL" %in% names(noidvarval)) }) + +test_that("make_supp_qual handles deprecated dataset_name parameter", { + load(metacore::metacore_example("pilot_SDTM.rda")) + ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) + + # Test that dataset_name is deprecated and shows guidance + suppressWarnings({ + result <- make_supp_qual(ae, metacore, dataset_name = "AE") + }) + + expect_s3_class(result, "data.frame") +}) + +test_that("combine_supp handles QNAM not in dataset columns", { + simple_ae <- safetyData::sdtm_ae[1:5, ] + simple_suppae <- safetyData::sdtm_suppae[1, ] + simple_suppae$QNAM <- "NEWCOL" # A new column to add + + # Should successfully add the new column + result <- combine_supp(simple_ae, simple_suppae) + expect_true("NEWCOL" %in% names(result)) +}) From 3e7ee6e95e3465fa42f20b87174b9670859d83bc Mon Sep 17 00:00:00 2001 From: Jeff Dickinson Date: Thu, 11 Dec 2025 16:54:17 -0600 Subject: [PATCH 3/5] #117 add test to supp --- tests/testthat/test-supp.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-supp.R b/tests/testthat/test-supp.R index 1261148..4dd5e72 100644 --- a/tests/testthat/test-supp.R +++ b/tests/testthat/test-supp.R @@ -294,3 +294,14 @@ test_that("combine_supp handles QNAM not in dataset columns", { result <- combine_supp(simple_ae, simple_suppae) expect_true("NEWCOL" %in% names(result)) }) + +test_that("combine_supp errors when QNAM already exists in dataset", { + simple_ae <- safetyData::sdtm_ae[1:5, ] + simple_ae$AETRTEM <- "existing" # Add column that matches QNAM + simple_suppae <- safetyData::sdtm_suppae[1, ] + + expect_error( + combine_supp(simple_ae, simple_suppae), + "already in the original dataset" + ) +}) From a1c366f8ebf960a970f61f73fff37bee2b586363 Mon Sep 17 00:00:00 2001 From: Jeff Dickinson Date: Thu, 11 Dec 2025 17:00:06 -0600 Subject: [PATCH 4/5] #117 add tests for supp --- tests/testthat/test-supp.R | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/testthat/test-supp.R b/tests/testthat/test-supp.R index 4dd5e72..58e2263 100644 --- a/tests/testthat/test-supp.R +++ b/tests/testthat/test-supp.R @@ -305,3 +305,55 @@ test_that("combine_supp errors when QNAM already exists in dataset", { "already in the original dataset" ) }) + +test_that("combine_supp handles IDVAR not in dataset", { + simple_ae <- safetyData::sdtm_ae[1:5, ] + simple_suppae <- safetyData::sdtm_suppae[1, ] + simple_suppae$IDVAR <- "FAKEIDVAR" # IDVAR that doesn't exist + + expect_error( + combine_supp(simple_ae, simple_suppae), + "replacement has 0 rows" + ) +}) + +test_that("combine_supp_by_idvar detects conflicting replacements across IDVARs", { + simple_ae <- safetyData::sdtm_ae %>% + filter(USUBJID %in% c("01-701-1015", "01-701-1023")) %>% + mutate(NEWID = dplyr::row_number()) + + # Create supp with same QNAM but different IDVARs that would cause conflicts + suppae_conflict <- bind_rows( + data.frame( + STUDYID = "CDISCPILOT01", + RDOMAIN = "AE", + USUBJID = "01-701-1015", + IDVAR = "AESEQ", + IDVARVAL = "1", + QNAM = "TESTVAR", + QLABEL = "Test Variable", + QVAL = "ValueA", + QORIG = "CRF", + QEVAL = "", + stringsAsFactors = FALSE + ), + data.frame( + STUDYID = "CDISCPILOT01", + RDOMAIN = "AE", + USUBJID = "01-701-1015", + IDVAR = "NEWID", + IDVARVAL = "1", + QNAM = "TESTVAR", + QLABEL = "Test Variable", + QVAL = "ValueB", # Different value for same subject/QNAM + QORIG = "CRF", + QEVAL = "", + stringsAsFactors = FALSE + ) + ) + + expect_error( + combine_supp(simple_ae, suppae_conflict), + "unexpected number of rows" + ) +}) From dbb2365a0612c7b17474f39046bfb0870155c856 Mon Sep 17 00:00:00 2001 From: Jeff Dickinson Date: Fri, 12 Dec 2025 10:12:56 -0600 Subject: [PATCH 5/5] #117 run styler --- R/build.R | 467 +++++++++++++++++---------------- R/checks.R | 383 ++++++++++++++------------- R/codelists.R | 279 ++++++++++---------- R/labels.R | 153 +++++------ R/sort.R | 63 +++-- R/supp.R | 357 +++++++++++++------------ R/utils.R | 8 +- README.Rmd | 17 +- tests/spelling.R | 9 +- tests/testthat/test-build.R | 176 +++++++------ tests/testthat/test-checks.R | 222 ++++++++-------- tests/testthat/test-codelist.R | 186 ++++++------- tests/testthat/test-labels.R | 182 ++++++------- tests/testthat/test-sort.R | 47 ++-- tests/testthat/test-supp.R | 457 +++++++++++++++++--------------- tests/testthat/test-utils.R | 8 +- 16 files changed, 1554 insertions(+), 1460 deletions(-) diff --git a/R/build.R b/R/build.R index e55da5a..b09d14b 100644 --- a/R/build.R +++ b/R/build.R @@ -56,110 +56,114 @@ #' build_from_derived(spec, ds_list, predecessor_only = FALSE) build_from_derived <- function(metacore, ds_list, dataset_name = deprecated(), predecessor_only = TRUE, keep = FALSE) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "build_from_derived(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "build_from_derived(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - - # Deprecate KEEP = TRUE - keep <- match.arg(as.character(keep), c("TRUE", "FALSE", "ALL", "PREREQUISITE")) - if (keep == "TRUE"){ - cli_warn(paste0("Setting 'keep' = TRUE has been superseded", + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) + + # Deprecate KEEP = TRUE + keep <- match.arg(as.character(keep), c("TRUE", "FALSE", "ALL", "PREREQUISITE")) + if (keep == "TRUE") { + cli_warn(paste0( + "Setting 'keep' = TRUE has been superseded", ", and will be unavailable in future releases. Please consider setting ", - "'keep' equal to 'ALL' or 'PREREQUISITE'.")) - } - - derirvations <- metacore$derivations %>% - mutate(derivation = trimws(derivation)) - - if (predecessor_only) { - limited_dev_ids <- metacore$value_spec %>% - filter(str_detect(str_to_lower(origin), "predecessor")) %>% - pull(derivation_id) - - derirvations <- derirvations %>% - filter(derivation_id %in% limited_dev_ids) - if (nrow(derirvations) == 0) { - stop("No predecessor variables found please check your metacore object") - } - } - - vars_to_pull_through <- derirvations %>% - filter(str_detect(derivation, "^\\w*\\.[a-zA-Z0-9]*$")) - - # To lower so it is flexible about how people name their ds list - vars_w_ds <- vars_to_pull_through %>% - mutate(ds = str_extract(derivation, "^\\w*(?=\\.)") %>% - str_to_lower()) - ds_names <- vars_w_ds %>% - pull(ds) %>% - unique() - if(is.null(names(ds_list))){ - names(ds_list) <- deparse(substitute(ds_list)) %>% - str_remove("list\\s?\\(") %>% - str_remove("\\)s?$") %>% - str_split(",\\s?") %>% - unlist() - } - names(ds_list) <- names(ds_list) %>% - str_to_lower() - if (!all(ds_names %in% names(ds_list))) { - unknown <- keep(names(ds_list), ~!.%in% ds_names) - if(length(unknown) > 0){ - warning(paste0("The following dataset(s) have no predecessors and will be ignored:\n"), - paste0(unknown, collapse = ", "), - call. = FALSE) - } - ds_using <- discard(names(ds_list), ~. %in% unknown) %>% - str_to_upper() %>% - paste0(collapse = ", ") - - message(paste0( - "Not all datasets provided. Only variables from ", - ds_using, - " will be gathered." - )) - - # Filter out any variable that come from datasets that aren't present - vars_w_ds <- vars_w_ds %>% - filter(ds %in% names(ds_list)) - - } - - ds_keys <- metacore$ds_vars %>% - filter(!is.na(key_seq)) %>% - pull(variable) - - joining_vals_to_add <- ds_list %>% - map(function(x){ - names(x) %>% - keep(~ . %in% ds_keys) - }) - - join_by = joining_vals_to_add %>% - reduce(intersect) - additional_vals <- tibble(ds = names(ds_list), - variable = joining_vals_to_add) %>% - unnest(variable) %>% - mutate(col_name = variable) - - vars_w_ds %>% - mutate(col_name = str_extract(derivation, "(?<=\\.).*")) %>% - inner_join(metacore$value_spec, ., by = "derivation_id") %>% - select(variable, ds, col_name) %>% - bind_rows(additional_vals) %>% - group_by(ds) %>% - group_split() %>% - map(get_variables, ds_list, keep, derirvations) %>% - prepare_join(join_by, names(ds_list)) %>% - reduce(full_join, by = join_by) + "'keep' equal to 'ALL' or 'PREREQUISITE'." + )) + } + + derirvations <- metacore$derivations %>% + mutate(derivation = trimws(derivation)) + + if (predecessor_only) { + limited_dev_ids <- metacore$value_spec %>% + filter(str_detect(str_to_lower(origin), "predecessor")) %>% + pull(derivation_id) + + derirvations <- derirvations %>% + filter(derivation_id %in% limited_dev_ids) + if (nrow(derirvations) == 0) { + stop("No predecessor variables found please check your metacore object") + } + } + + vars_to_pull_through <- derirvations %>% + filter(str_detect(derivation, "^\\w*\\.[a-zA-Z0-9]*$")) + + # To lower so it is flexible about how people name their ds list + vars_w_ds <- vars_to_pull_through %>% + mutate(ds = str_extract(derivation, "^\\w*(?=\\.)") %>% + str_to_lower()) + ds_names <- vars_w_ds %>% + pull(ds) %>% + unique() + if (is.null(names(ds_list))) { + names(ds_list) <- deparse(substitute(ds_list)) %>% + str_remove("list\\s?\\(") %>% + str_remove("\\)s?$") %>% + str_split(",\\s?") %>% + unlist() + } + names(ds_list) <- names(ds_list) %>% + str_to_lower() + if (!all(ds_names %in% names(ds_list))) { + unknown <- keep(names(ds_list), ~ !. %in% ds_names) + if (length(unknown) > 0) { + warning(paste0("The following dataset(s) have no predecessors and will be ignored:\n"), + paste0(unknown, collapse = ", "), + call. = FALSE + ) + } + ds_using <- discard(names(ds_list), ~ . %in% unknown) %>% + str_to_upper() %>% + paste0(collapse = ", ") + + message(paste0( + "Not all datasets provided. Only variables from ", + ds_using, + " will be gathered." + )) + + # Filter out any variable that come from datasets that aren't present + vars_w_ds <- vars_w_ds %>% + filter(ds %in% names(ds_list)) + } + + ds_keys <- metacore$ds_vars %>% + filter(!is.na(key_seq)) %>% + pull(variable) + + joining_vals_to_add <- ds_list %>% + map(function(x) { + names(x) %>% + keep(~ . %in% ds_keys) + }) + + join_by <- joining_vals_to_add %>% + reduce(intersect) + additional_vals <- tibble( + ds = names(ds_list), + variable = joining_vals_to_add + ) %>% + unnest(variable) %>% + mutate(col_name = variable) + + vars_w_ds %>% + mutate(col_name = str_extract(derivation, "(?<=\\.).*")) %>% + inner_join(metacore$value_spec, ., by = "derivation_id") %>% + select(variable, ds, col_name) %>% + bind_rows(additional_vals) %>% + group_by(ds) %>% + group_split() %>% + map(get_variables, ds_list, keep, derirvations) %>% + prepare_join(join_by, names(ds_list)) %>% + reduce(full_join, by = join_by) } #' Internal functions to get variables from a dataset list @@ -174,46 +178,46 @@ build_from_derived <- function(metacore, ds_list, dataset_name = deprecated(), #' @return datasets #' @noRd get_variables <- function(x, ds_list, keep, derivations) { - ds_name <- unique(x$ds) - data <- ds_list[[ds_name]] - rename_vec <- set_names(x$col_name, x$variable) - if (keep == "TRUE") { - # Don't drop predecessor columns - out <- data %>% - select(x$col_name) %>% - mutate(across(all_of(rename_vec))) - } else if (keep == "FALSE") { - # Drop predecessor columns - out <- data %>% - select(x$col_name) %>% - mutate(across(all_of(rename_vec))) %>% - select(x$variable) - } else if (keep == "ALL") { - # Keep all cols from original datasets - out <- data %>% - mutate(across(all_of(rename_vec))) - } else if (keep == "PREREQUISITE") { - # Keep all columns required for future derivations - # Find all "XX.XXXXX" - future_derivations <- derivations %>% - select(derivation) %>% - filter(!str_detect(derivation,"^[A-Z0-9a-z]+\\.[A-Z0-9a-z]+$")) - - prereq_vector <- str_match_all(future_derivations$derivation, "([A-Z0-9a-z]+)\\.([A-Z0-9a-z]+)") - - # Bind into matrix + remove dups - prereq_matrix <- do.call(rbind,prereq_vector) %>% - unique() + ds_name <- unique(x$ds) + data <- ds_list[[ds_name]] + rename_vec <- set_names(x$col_name, x$variable) + if (keep == "TRUE") { + # Don't drop predecessor columns + out <- data %>% + select(x$col_name) %>% + mutate(across(all_of(rename_vec))) + } else if (keep == "FALSE") { + # Drop predecessor columns + out <- data %>% + select(x$col_name) %>% + mutate(across(all_of(rename_vec))) %>% + select(x$variable) + } else if (keep == "ALL") { + # Keep all cols from original datasets + out <- data %>% + mutate(across(all_of(rename_vec))) + } else if (keep == "PREREQUISITE") { + # Keep all columns required for future derivations + # Find all "XX.XXXXX" + future_derivations <- derivations %>% + select(derivation) %>% + filter(!str_detect(derivation, "^[A-Z0-9a-z]+\\.[A-Z0-9a-z]+$")) + + prereq_vector <- str_match_all(future_derivations$derivation, "([A-Z0-9a-z]+)\\.([A-Z0-9a-z]+)") + + # Bind into matrix + remove dups + prereq_matrix <- do.call(rbind, prereq_vector) %>% + unique() - # Subset to those present in current dataset - prereq_cols <- subset(prereq_matrix, tolower(prereq_matrix[,2]) == tolower(ds_name))[,3] + # Subset to those present in current dataset + prereq_cols <- subset(prereq_matrix, tolower(prereq_matrix[, 2]) == tolower(ds_name))[, 3] - out <- data %>% - select(c(x$col_name, all_of(prereq_cols))) %>% - mutate(across(all_of(rename_vec))) %>% - select(c(x$variable, all_of(prereq_cols))) - } - out + out <- data %>% + select(c(x$col_name, all_of(prereq_cols))) %>% + mutate(across(all_of(rename_vec))) %>% + select(c(x$variable, all_of(prereq_cols))) + } + out } #' Internal function to remove duplicated non-key variables prior to join @@ -232,29 +236,29 @@ get_variables <- function(x, ds_list, keep, derivations) { #' @return datasets #' @noRd prepare_join <- function(x, keys, ds_names) { - out <- list(x[[1]]) + out <- list(x[[1]]) - if (length(x) > 1){ - for (i in 2:length(x)){ - # Drop non-key cols present in each previous dataset in order - drop_cols <- c() + if (length(x) > 1) { + for (i in 2:length(x)) { + # Drop non-key cols present in each previous dataset in order + drop_cols <- c() - for (j in 1:(i-1)){ - conflicting_cols <- keep(names(x[[j]]), function(col) !(col %in% keys)) %>% - intersect(colnames(x[[i]])) - drop_cols <- c(drop_cols, conflicting_cols) + for (j in 1:(i - 1)) { + conflicting_cols <- keep(names(x[[j]]), function(col) !(col %in% keys)) %>% + intersect(colnames(x[[i]])) + drop_cols <- c(drop_cols, conflicting_cols) - if(length(conflicting_cols) > 0){ - cli_inform(c("i" = "Dropping column(s) from {ds_names[[i]]} due to \\ + if (length(conflicting_cols) > 0) { + cli_inform(c("i" = "Dropping column(s) from {ds_names[[i]]} due to \\ conflict with {ds_names[[j]]}: {conflicting_cols}.")) - } - } - - out[[i]] <- x[[i]] %>% - select(-any_of(drop_cols)) + } } - } - out + + out[[i]] <- x[[i]] %>% + select(-any_of(drop_cols)) + } + } + out } #' Drop Unspecified Variables @@ -284,36 +288,37 @@ prepare_join <- function(x, keys, ds_names) { #' mutate(foo = "Hello") #' drop_unspec_vars(data, spec) drop_unspec_vars <- function(dataset, metacore, dataset_name = deprecated()) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "drop_unspec_vars(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "drop_unspec_vars(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - - verify_DatasetMeta(metacore) - var_list <- metacore$ds_vars %>% - filter(is.na(supp_flag) | !(supp_flag)) %>% - pull(variable) - to_drop <- names(dataset) %>% - discard(~ . %in% var_list) - if (length(to_drop) > 0) { - out <- dataset %>% - select(-all_of(to_drop)) - message(paste0("The following variable(s) were dropped:\n ", - paste0(to_drop, collapse = "\n "))) - } else { - out <- dataset - } - out + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + + verify_DatasetMeta(metacore) + var_list <- metacore$ds_vars %>% + filter(is.na(supp_flag) | !(supp_flag)) %>% + pull(variable) + to_drop <- names(dataset) %>% + discard(~ . %in% var_list) + if (length(to_drop) > 0) { + out <- dataset %>% + select(-all_of(to_drop)) + message(paste0( + "The following variable(s) were dropped:\n ", + paste0(to_drop, collapse = "\n ") + )) + } else { + out <- dataset + } + out } - #' Add Missing Variables #' #' This function adds in missing columns according to the type set in the @@ -340,58 +345,60 @@ drop_unspec_vars <- function(dataset, metacore, dataset_name = deprecated()) { #' load(metacore_example("pilot_ADaM.rda")) #' spec <- metacore %>% select_dataset("ADSL") #' data <- read_xpt(metatools_example("adsl.xpt")) %>% -#' select(-TRTSDT, -TRT01P, -TRT01PN) +#' select(-TRTSDT, -TRT01P, -TRT01PN) #' add_variables(data, spec) -add_variables <- function(dataset, metacore, dataset_name = deprecated()){ - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "add_variables(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. +add_variables <- function(dataset, metacore, dataset_name = deprecated()) { + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "add_variables(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + + verify_DatasetMeta(metacore) + var_list <- metacore$ds_vars %>% + filter(is.na(supp_flag) | !(supp_flag)) %>% + pull(variable) + + to_add <- var_list %>% + discard(~ . %in% names(dataset)) + if (length(to_add) > 0) { + n <- nrow(dataset) + typing <- metacore$var_spec %>% + filter(variable %in% to_add) %>% + mutate( + type_fmt = str_to_lower(type), + out_type = + case_when( + str_detect(str_to_lower(format), "date") ~ "date", + type_fmt == "integer" ~ "integer", + type_fmt == "numeric" ~ "double", + type_fmt == "text" ~ "character", + type_fmt == "character" ~ "character", + type_fmt == "boolean" ~ "logical", + type_fmt == "logical" ~ "logical", + TRUE ~ "unknown" + ) ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - var_list <- metacore$ds_vars %>% - filter(is.na(supp_flag) | !(supp_flag)) %>% - pull(variable) - - to_add <- var_list %>% - discard(~ . %in% names(dataset)) - if(length(to_add) > 0){ - n <- nrow(dataset) - typing <- metacore$var_spec %>% - filter(variable %in% to_add) %>% - mutate(type_fmt = str_to_lower(type), - out_type = - case_when( - str_detect(str_to_lower(format), "date") ~ "date", - type_fmt == "integer" ~ "integer", - type_fmt == "numeric" ~ "double", - type_fmt == "text" ~ "character", - type_fmt == "character" ~ "character", - type_fmt == "boolean" ~"logical", - type_fmt == "logical" ~"logical", - TRUE ~ "unknown" - )) - - new_cols <- map(typing$out_type, function(typ){ - out <- switch(typ, - "character" = rep(NA_character_, n), - "integer" = rep(NA_integer_, n), - "double" = rep(NA_real_, n), - "date" = as.Date(rep(NA_integer_, n)), - "logical" = rep(NA, n), - "unknown" = rep(NA, n) - ) - }) - names(new_cols) <- typing$variable - new_cols <- as_tibble(new_cols) + new_cols <- map(typing$out_type, function(typ) { + out <- switch(typ, + "character" = rep(NA_character_, n), + "integer" = rep(NA_integer_, n), + "double" = rep(NA_real_, n), + "date" = as.Date(rep(NA_integer_, n)), + "logical" = rep(NA, n), + "unknown" = rep(NA, n) + ) + }) + names(new_cols) <- typing$variable + new_cols <- as_tibble(new_cols) - dataset <- bind_cols(dataset, new_cols) - } - dataset + dataset <- bind_cols(dataset, new_cols) + } + dataset } diff --git a/R/checks.R b/R/checks.R index bb7deb7..a8ab165 100644 --- a/R/checks.R +++ b/R/checks.R @@ -28,17 +28,19 @@ #' check_ct_col(data, spec, TRT01PN) #' check_ct_col(data, spec, "TRT01PN") check_ct_col <- function(data, metacore, var, na_acceptable = NULL) { - verify_DatasetMeta(metacore) - bad_vals <- get_bad_ct(data = data, metacore = metacore, - var = {{var}}, na_acceptable = na_acceptable) - if(length(bad_vals) == 0){ - data - } else { - extra <- bad_vals %>% - paste0("'", ., "'") %>% - paste0(collapse = ", ") - stop(paste("The following values should not be present:\n", extra)) - } + verify_DatasetMeta(metacore) + bad_vals <- get_bad_ct( + data = data, metacore = metacore, + var = {{ var }}, na_acceptable = na_acceptable + ) + if (length(bad_vals) == 0) { + data + } else { + extra <- bad_vals %>% + paste0("'", ., "'") %>% + paste0(collapse = ", ") + stop(paste("The following values should not be present:\n", extra)) + } } #' Gets vector of control terminology which should be there @@ -71,40 +73,39 @@ check_ct_col <- function(data, metacore, var, na_acceptable = NULL) { #' get_bad_ct(data, spec, "DCSREAS") #' get_bad_ct(data, spec, "DCSREAS", na_acceptable = FALSE) #' -get_bad_ct <- function(data, metacore, var, na_acceptable = NULL){ - verify_DatasetMeta(metacore) - col_name_str <- as_label(enexpr(var)) %>% - str_remove_all("\"") - if (!col_name_str %in% names(data)) { - stop(paste(col_name_str, "not found in dataset. Please check and try again"), call. = FALSE) - } - ct <- get_control_term(metacore, {{ var }}) - if (is.vector(ct)) { - check <- ct - } else if ("code" %in% names(ct)) { - check <- ct %>% pull(code) - } else { - stop("We currently don't have the ability to check against external libraries", call. = FALSE) - } - core <- metacore$ds_vars %>% - filter(variable == col_name_str) %>% - pull(core) - attr(core, "label") <- NULL - test <- ifelse(is.null(na_acceptable), !identical(core, "Required"), na_acceptable) - if (test) { - if (all(is.character(check))) { - check <- c(check, NA_character_, "") - } else { - check <- c(check, NA) - } - } - test <- pull(data, {{ var }}) %in% check - pull(data, {{ var }})[!test] %>% - unique() +get_bad_ct <- function(data, metacore, var, na_acceptable = NULL) { + verify_DatasetMeta(metacore) + col_name_str <- as_label(enexpr(var)) %>% + str_remove_all("\"") + if (!col_name_str %in% names(data)) { + stop(paste(col_name_str, "not found in dataset. Please check and try again"), call. = FALSE) + } + ct <- get_control_term(metacore, {{ var }}) + if (is.vector(ct)) { + check <- ct + } else if ("code" %in% names(ct)) { + check <- ct %>% pull(code) + } else { + stop("We currently don't have the ability to check against external libraries", call. = FALSE) + } + core <- metacore$ds_vars %>% + filter(variable == col_name_str) %>% + pull(core) + attr(core, "label") <- NULL + test <- ifelse(is.null(na_acceptable), !identical(core, "Required"), na_acceptable) + if (test) { + if (all(is.character(check))) { + check <- c(check, NA_character_, "") + } else { + check <- c(check, NA) + } + } + test <- pull(data, {{ var }}) %in% check + pull(data, {{ var }})[!test] %>% + unique() } - #' Check Control Terminology for a Dataset #' #' This function checks that all columns in the dataset only contains the @@ -141,90 +142,92 @@ get_bad_ct <- function(data, metacore, var, na_acceptable = NULL){ #' check_ct_data(data, spec, na_acceptable = FALSE) #' check_ct_data(data, spec, na_acceptable = FALSE, omit_vars = "DISCONFL") #' check_ct_data(data, spec, na_acceptable = c("DSRAEFL", "DCSREAS"), omit_vars = "DISCONFL") -#'} +#' } check_ct_data <- function(data, metacore, na_acceptable = NULL, omit_vars = NULL) { - verify_DatasetMeta(metacore) - codes_in_data <- metacore$value_spec %>% - filter(variable %in% names(data), !is.na(code_id)) %>% - pull(code_id) %>% - unique() - # Remove any codes that have external libraries - codes_to_check <- metacore$codelist %>% - filter(type != "external_library", code_id %in% codes_in_data) %>% - select(code_id) - # convert list of codes to variables - cols_to_check <- metacore$value_spec %>% - inner_join(codes_to_check, by = "code_id", multiple = "all", relationship = "many-to-many") %>% - filter(variable %in% names(data)) %>% - pull(variable) %>% - unique() + verify_DatasetMeta(metacore) + codes_in_data <- metacore$value_spec %>% + filter(variable %in% names(data), !is.na(code_id)) %>% + pull(code_id) %>% + unique() + # Remove any codes that have external libraries + codes_to_check <- metacore$codelist %>% + filter(type != "external_library", code_id %in% codes_in_data) %>% + select(code_id) + # convert list of codes to variables + cols_to_check <- metacore$value_spec %>% + inner_join(codes_to_check, by = "code_id", multiple = "all", relationship = "many-to-many") %>% + filter(variable %in% names(data)) %>% + pull(variable) %>% + unique() - # Subset cols_to_check by omit_vars - if (is.character(omit_vars)) { - check_vars_in_data(omit_vars, "omit_vars", data) - cols_to_check <- setdiff(cols_to_check, omit_vars) - } + # Subset cols_to_check by omit_vars + if (is.character(omit_vars)) { + check_vars_in_data(omit_vars, "omit_vars", data) + cols_to_check <- setdiff(cols_to_check, omit_vars) + } - # send all variables through check_ct_col - safe_chk <- safely(check_ct_col) + # send all variables through check_ct_col + safe_chk <- safely(check_ct_col) - if (is.character(na_acceptable)) { - check_vars_in_data(na_acceptable, "na_acceptable", data) - new_na_acceptable <- rep(FALSE, length(cols_to_check)) - new_na_acceptable[match(na_acceptable, cols_to_check)] <- TRUE + if (is.character(na_acceptable)) { + check_vars_in_data(na_acceptable, "na_acceptable", data) + new_na_acceptable <- rep(FALSE, length(cols_to_check)) + new_na_acceptable[match(na_acceptable, cols_to_check)] <- TRUE - results <- map2(cols_to_check, new_na_acceptable, function(x, naac) { - out <- safe_chk(data, metacore, {{ x }}, naac) - out$error + results <- map2(cols_to_check, new_na_acceptable, function(x, naac) { + out <- safe_chk(data, metacore, {{ x }}, naac) + out$error + }) + } else if (is.logical(na_acceptable) || is.null(na_acceptable)) { + results <- cols_to_check %>% + map(function(x) { + out <- safe_chk(data, metacore, {{ x }}, na_acceptable) + out$error }) - - } else if(is.logical(na_acceptable) || is.null(na_acceptable)) { - results <- cols_to_check %>% - map(function(x) { - out <- safe_chk(data, metacore, {{ x }}, na_acceptable) - out$error - }) - - } else { - stop("na_acceptable is not NULL, logical or character.", call. = FALSE) - } + } else { + stop("na_acceptable is not NULL, logical or character.", call. = FALSE) + } - # Write out warning message - test <- map_lgl(results, is.null) - if (all(test)) { - return(data) - } else { - extras <- results %>% - discard(is.null) %>% - map(~.$message) %>% - unlist() %>% - str_remove("The following values should not be present:\n\\s") - unique_test <- extras %>% - keep(~str_detect(., "does not have a unique control term")) - if(length(unique_test) > 0){ - stop(paste0(unique_test, collapse = "\n"), call. = FALSE) - } - message <- paste0(cols_to_check[!test], " (", extras, ")") %>% - paste0(collapse = "\n") - stop(paste0( - "The following variables contained values not found in the control terminology + # Write out warning message + test <- map_lgl(results, is.null) + if (all(test)) { + return(data) + } else { + extras <- results %>% + discard(is.null) %>% + map(~ .$message) %>% + unlist() %>% + str_remove("The following values should not be present:\n\\s") + unique_test <- extras %>% + keep(~ str_detect(., "does not have a unique control term")) + if (length(unique_test) > 0) { + stop(paste0(unique_test, collapse = "\n"), call. = FALSE) + } + message <- paste0(cols_to_check[!test], " (", extras, ")") %>% + paste0(collapse = "\n") + stop( + paste0( + "The following variables contained values not found in the control terminology Variable (Prohibited Value(s))\n", - message), - call. = FALSE) - } + message + ), + call. = FALSE + ) + } } check_vars_in_data <- function(vars, vars_name, data) { - if (!all(vars %in% names(data))) { - stop( - paste0( - "Not all variables from ", vars_name, " are in the data: ", - paste0(setdiff(vars, names(data)), collapse = ",") - ), - call. = FALSE) - } - return(NULL) + if (!all(vars %in% names(data))) { + stop( + paste0( + "Not all variables from ", vars_name, " are in the data: ", + paste0(setdiff(vars, names(data)), collapse = ",") + ), + call. = FALSE + ) + } + return(NULL) } #' Check Variable Names @@ -261,45 +264,45 @@ check_vars_in_data <- function(vars, vars_name, data) { #' data["DUMMY_COL"] <- NA #' check_variables(data, spec, strict = FALSE) check_variables <- function(data, metacore, dataset_name = deprecated(), strict = TRUE) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "check_variables(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "check_variables(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) - var_list <- metacore$ds_vars %>% - filter(is.na(supp_flag) | !(supp_flag)) %>% - pull(variable) + var_list <- metacore$ds_vars %>% + filter(is.na(supp_flag) | !(supp_flag)) %>% + pull(variable) - missing <- var_list %>% discard(~ . %in% names(data)) - extra <- names(data) %>% discard(~ . %in% var_list) + missing <- var_list %>% discard(~ . %in% names(data)) + extra <- names(data) %>% discard(~ . %in% var_list) - messages <- character(0) - data_list <- list() + messages <- character(0) + data_list <- list() - if (length(missing) > 0) { - messages <- c(messages, "The following variables are missing") - data_list <- c(data_list, list(missing)) - } + if (length(missing) > 0) { + messages <- c(messages, "The following variables are missing") + data_list <- c(data_list, list(missing)) + } - if (length(extra) > 0) { - messages <- c(messages, "The following variables do not belong") - data_list <- c(data_list, list(extra)) - } + if (length(extra) > 0) { + messages <- c(messages, "The following variables do not belong") + data_list <- c(data_list, list(extra)) + } - if (length(messages) > 0) { - print_to_console(messages, data_list, strict = {{ strict }}) - } else { - message("No missing or extra variables") - } + if (length(messages) > 0) { + print_to_console(messages, data_list, strict = {{ strict }}) + } else { + message("No missing or extra variables") + } - data + data } #' Print Messages to Console @@ -327,21 +330,23 @@ check_variables <- function(data, metacore, dataset_name = deprecated(), strict #' @noRd #' print_to_console <- function(messages, data_list, strict = TRUE) { - calling_function <- paste(deparse(sys.call(-1)), collapse = " ") - output_string <- paste0("In: [", calling_function, "]" ) + calling_function <- paste(deparse(sys.call(-1)), collapse = " ") + output_string <- paste0("In: [", calling_function, "]") - for (i in seq_along(messages)) { - message <- paste0(messages[i], ": ", - paste(data_list[[i]], collapse = ", "), sep = "\n") + for (i in seq_along(messages)) { + message <- paste0(messages[i], ": ", + paste(data_list[[i]], collapse = ", "), + sep = "\n" + ) - output_string <- paste(output_string, message, sep = "\n\n") - } + output_string <- paste(output_string, message, sep = "\n\n") + } - options(deparse.max.lines = 2000L) - switch (as.character(strict), - "TRUE" = cli::cli_abort(output_string, call = NULL), - "FALSE" = cli::cli_warn(output_string, call = NULL) - ) + options(deparse.max.lines = 2000L) + switch(as.character(strict), + "TRUE" = cli::cli_abort(output_string, call = NULL), + "FALSE" = cli::cli_warn(output_string, call = NULL) + ) } #' Check Uniqueness of Records by Key @@ -372,38 +377,40 @@ print_to_console <- function(messages, data_list, strict = TRUE) { #' data <- read_xpt(metatools_example("adsl.xpt")) #' check_unique_keys(data, spec) check_unique_keys <- function(data, metacore, dataset_name = deprecated()) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "check_unique_keys(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "check_unique_keys(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - keys <- get_keys(metacore,expr(!!metacore$ds_spec$dataset)) - var_list <- keys %>% - pull(variable) - missing <- var_list %>% - discard(~ . %in% names(data)) - if (length(missing) > 0) { - stop(paste0( - "The following variable keys are missing in the dataset:\n", - paste0(missing, collapse = "\n") - )) - } - grouped <- data %>% - group_by(pick(!!keys$variable)) %>% - add_count() %>% - filter(.data$n != 1) - if (nrow(grouped) == 0) { - message("Keys uniquely identify records") - } else { - stop(paste0("Keys do not uniquely identify records\n", - "variable keys:\n", - paste0(var_list, collapse = "\n"))) - } - data + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) + keys <- get_keys(metacore, expr(!!metacore$ds_spec$dataset)) + var_list <- keys %>% + pull(variable) + missing <- var_list %>% + discard(~ . %in% names(data)) + if (length(missing) > 0) { + stop(paste0( + "The following variable keys are missing in the dataset:\n", + paste0(missing, collapse = "\n") + )) + } + grouped <- data %>% + group_by(pick(!!keys$variable)) %>% + add_count() %>% + filter(.data$n != 1) + if (nrow(grouped) == 0) { + message("Keys uniquely identify records") + } else { + stop(paste0( + "Keys do not uniquely identify records\n", + "variable keys:\n", + paste0(var_list, collapse = "\n") + )) + } + data } diff --git a/R/codelists.R b/R/codelists.R index d6ac879..58c4ade 100644 --- a/R/codelists.R +++ b/R/codelists.R @@ -1,4 +1,3 @@ - #' Dash to Equation #' #' Converts strings that are #-# style to a logical expression (but in a string format) @@ -7,11 +6,11 @@ #' @return string #' @noRd dash_to_eq <- function(string) { - front <- str_extract(string, "^.*(?=\\-)") - front_eq <- if_else(str_detect(front, "<|>|="), front, paste0(">=", front)) - back <- str_extract(string, "(?<=\\-).*$") - back_eq <- if_else(str_detect(back, "<|>|="), back, paste0("<=", back)) - paste0("x", front_eq, " & x", back_eq) + front <- str_extract(string, "^.*(?=\\-)") + front_eq <- if_else(str_detect(front, "<|>|="), front, paste0(">=", front)) + back <- str_extract(string, "(?<=\\-).*$") + back_eq <- if_else(str_detect(back, "<|>|="), back, paste0("<=", back)) + paste0("x", front_eq, " & x", back_eq) } @@ -33,55 +32,60 @@ dash_to_eq <- function(string) { #' create_subgrps(c(1:10), c("<2", "2-<5", ">=5")) #' create_subgrps(c(1:10), c("<2", "2-<5", ">=5"), c("<2 years", "2-5 years", ">=5 years")) create_subgrps <- function(ref_vec, grp_defs, grp_labs = NULL) { - if (!is.numeric(ref_vec)) { cli_abort("ref_vec must be numeric") } - if (is.null(grp_labs)) { grp_labs <- grp_defs } + if (!is.numeric(ref_vec)) { + cli_abort("ref_vec must be numeric") + } + if (is.null(grp_labs)) { + grp_labs <- grp_defs + } - # Create equations used to derive the subgroups - equations <- case_when( - str_detect(grp_defs, "-") ~ paste0("function(x){if_else(", dash_to_eq(grp_defs), ", '", grp_labs, "', '')}"), - str_detect(grp_defs, "^(<\\s?=|>\\s?=|<|>)\\s?\\d+") ~ paste0("function(x){if_else(x", grp_defs, ",'", grp_labs, "', '')}"), - TRUE ~ NA_character_ - ) + # Create equations used to derive the subgroups + equations <- case_when( + str_detect(grp_defs, "-") ~ paste0("function(x){if_else(", dash_to_eq(grp_defs), ", '", grp_labs, "', '')}"), + str_detect(grp_defs, "^(<\\s?=|>\\s?=|<|>)\\s?\\d+") ~ paste0("function(x){if_else(x", grp_defs, ",'", grp_labs, "', '')}"), + TRUE ~ NA_character_ + ) - # Apply equations - if (all(!is.na(equations))) { - functions <- equations %>% - map(~ eval(parse(text = .))) - out <- functions %>% - map(~ .(ref_vec)) %>% - reduce(str_c) %>% - replace(. == "", NA) - } else { - na_index <- which(is.na(equations)) - bad_defs <- grp_defs[na_index] - cli_abort(paste("Unable to decipher the following group definition{?s}: {bad_defs}.", - "Please check your controlled terminology.")) - } - # Find non-exclusive subgroups i.e., values that have been mapped to two groups - non_excl <- out |> - discard(is.na) |> - map(~ grp_labs[str_detect(.x, grp_labs)]) |> - keep(~ length(.) > 1) |> - unique() + # Apply equations + if (all(!is.na(equations))) { + functions <- equations %>% + map(~ eval(parse(text = .))) + out <- functions %>% + map(~ .(ref_vec)) %>% + reduce(str_c) %>% + replace(. == "", NA) + } else { + na_index <- which(is.na(equations)) + bad_defs <- grp_defs[na_index] + cli_abort(paste( + "Unable to decipher the following group definition{?s}: {bad_defs}.", + "Please check your controlled terminology." + )) + } + # Find non-exclusive subgroups i.e., values that have been mapped to two groups + non_excl <- out |> + discard(is.na) |> + map(~ grp_labs[str_detect(.x, grp_labs)]) |> + keep(~ length(.) > 1) |> + unique() - # Throw error if groups are not exclusive - if (length(non_excl) > 0) { - msg <- map_chr(non_excl, ~ { - items <- paste(.x, collapse = ", ") - }) %>% - paste0(seq_along(.), ". ", .) + # Throw error if groups are not exclusive + if (length(non_excl) > 0) { + msg <- map_chr(non_excl, ~ { + items <- paste(.x, collapse = ", ") + }) %>% + paste0(seq_along(.), ". ", .) - cli_abort(c( - "Group definitions are not exclusive. Please check your controlled terminology", - "The following group definitions overlap:", - msg - )) - } - out + cli_abort(c( + "Group definitions are not exclusive. Please check your controlled terminology", + "The following group definitions overlap:", + msg + )) + } + out } - #' Create Variable from Codelist #' #' This functions uses code/decode pairs from a metacore object to create new @@ -131,91 +135,95 @@ create_subgrps <- function(ref_vec, grp_defs, grp_labs = NULL) { #' #' # Example providing a custom codelist #' # This example also reverses the direction of translation -#' load(metacore_example('pilot_ADaM.rda')) +#' load(metacore_example("pilot_ADaM.rda")) #' adlb_spec <- select_dataset(metacore, "ADLBC", quiet = TRUE) #' adlb <- tibble(PARAMCD = c("ALB", "ALP", "ALT", "AST", "BILI", "BUN")) #' create_var_from_codelist( -#' adlb, -#' adlb_spec, -#' PARAMCD, -#' PARAM, -#' codelist = get_control_term(adlb_spec, PARAMCD), -#' decode_to_code = FALSE, -#' strict = FALSE) +#' adlb, +#' adlb_spec, +#' PARAMCD, +#' PARAM, +#' codelist = get_control_term(adlb_spec, PARAMCD), +#' decode_to_code = FALSE, +#' strict = FALSE +#' ) #' -#'\dontrun{ +#' \dontrun{ #' # Example expecting warning where `strict` == `TRUE` #' adlb <- tibble(PARAMCD = c("ALB", "ALP", "ALT", "AST", "BILI", "BUN", "DUMMY1", "DUMMY2")) #' create_var_from_codelist( -#' adlb, -#' adlb_spec, -#' PARAMCD, -#' PARAM, -#' codelist = get_control_term(adlb_spec, PARAMCD), -#' decode_to_code = FALSE, -#' strict = TRUE) +#' adlb, +#' adlb_spec, +#' PARAMCD, +#' PARAM, +#' codelist = get_control_term(adlb_spec, PARAMCD), +#' decode_to_code = FALSE, +#' strict = TRUE +#' ) #' } create_var_from_codelist <- function(data, metacore, input_var, out_var, codelist = NULL, decode_to_code = TRUE, strict = TRUE) { - verify_DatasetMeta(metacore) + verify_DatasetMeta(metacore) - # Use codelist if provided, else use codelist of the out_var - if (!missing(codelist)) { code_translation <- codelist } - else { code_translation <- get_control_term(metacore, {{ out_var }}) } + # Use codelist if provided, else use codelist of the out_var + if (!missing(codelist)) { + code_translation <- codelist + } else { + code_translation <- get_control_term(metacore, {{ out_var }}) + } - if (is.vector(code_translation) | !("decode" %in% names(code_translation))) { - cli_abort("Expecting 'code_decode' type of control terminology. Actual \\ + if (is.vector(code_translation) | !("decode" %in% names(code_translation))) { + cli_abort("Expecting 'code_decode' type of control terminology. Actual \\ type is {typeof(code_translation)}. Check the structure of the codelist in the \\ {.obj metacore} object using {.fn View}.") - } + } - # Check decode_to_code is logical and set direction of translation - if (!is_logical(decode_to_code)) { - cli_abort("{.arg decode_to_code} must be either TRUE or FALSE.") - } + # Check decode_to_code is logical and set direction of translation + if (!is_logical(decode_to_code)) { + cli_abort("{.arg decode_to_code} must be either TRUE or FALSE.") + } - ref_var <- if (decode_to_code) "decode" else "code" - new_var <- if (decode_to_code) "code" else "decode" + ref_var <- if (decode_to_code) "decode" else "code" + new_var <- if (decode_to_code) "code" else "decode" - # Pull data values and codelist values to check inconsistent overlap - values <- data |> pull({{ input_var }}) - codelist <- code_translation |> pull(ref_var) + # Pull data values and codelist values to check inconsistent overlap + values <- data |> pull({{ input_var }}) + codelist <- code_translation |> pull(ref_var) - miss <- setdiff(values, codelist) - if (strict == TRUE && length(miss) > 0) { - cli_warn( - "In {.fn create_var_from_codelist}: The following value{?s} present in the -input dataset {?is/are} not present in the codelist: {miss}") - } + miss <- setdiff(values, codelist) + if (strict == TRUE && length(miss) > 0) { + cli_warn( + "In {.fn create_var_from_codelist}: The following value{?s} present in the +input dataset {?is/are} not present in the codelist: {miss}" + ) + } - input_var_str <- as_label(enexpr(input_var)) |> - str_remove_all("\"") + input_var_str <- as_label(enexpr(input_var)) |> + str_remove_all("\"") - # Coerce join column to character to ensure join if input var is numeric - data <- data |> mutate(merge_on := as.character(.data[[input_var_str]])) - code_translation <- code_translation |> - mutate( - decode = as.character(decode), - code = as.character(code) - ) + # Coerce join column to character to ensure join if input var is numeric + data <- data |> mutate(merge_on := as.character(.data[[input_var_str]])) + code_translation <- code_translation |> + mutate( + decode = as.character(decode), + code = as.character(code) + ) - out <- data |> - left_join(code_translation, by = set_names(ref_var, "merge_on")) |> - rename({{ out_var }} := !!sym(new_var)) |> - select(-merge_on) + out <- data |> + left_join(code_translation, by = set_names(ref_var, "merge_on")) |> + rename({{ out_var }} := !!sym(new_var)) |> + select(-merge_on) - # Optionally coerce to numeric if the output values are numeric - if (all(str_detect(code_translation[[new_var]], "^\\d*$"))) { - out <- out |> - mutate({{ out_var }} := as.numeric({{ out_var }})) - } + # Optionally coerce to numeric if the output values are numeric + if (all(str_detect(code_translation[[new_var]], "^\\d*$"))) { + out <- out |> + mutate({{ out_var }} := as.numeric({{ out_var }})) + } - out + out } - - #' Create Categorical Variable from Codelist #' #' Using the grouping from either the `decode_var` or `code_var` and a reference @@ -255,34 +263,38 @@ input dataset {?is/are} not present in the codelist: {miss}") #' create_cat_var(dm, spec, AGE, AGEGR1, AGEGR1N) create_cat_var <- function(data, metacore, ref_var, grp_var, num_grp_var = NULL, create_from_decode = FALSE, strict = TRUE) { - verify_DatasetMeta(metacore) - ct <- get_control_term(metacore, {{ grp_var }}) - if (is.vector(ct) | !("decode" %in% names(ct))) { - cli_abort("Expecting 'code_decode' type of control terminology. Please check metacore object") - } + verify_DatasetMeta(metacore) + ct <- get_control_term(metacore, {{ grp_var }}) + if (is.vector(ct) | !("decode" %in% names(ct))) { + cli_abort("Expecting 'code_decode' type of control terminology. Please check metacore object") + } + + # Assign group definitions and labels + grp_defs <- pull(ct, code) + grp_labs <- if (create_from_decode) pull(ct, decode) else grp_defs - # Assign group definitions and labels - grp_defs <- pull(ct, code) - grp_labs <- if (create_from_decode) pull(ct, decode) else grp_defs - - out <- data %>% - mutate({{ grp_var }} := create_subgrps({{ ref_var }}, grp_defs, grp_labs)) + out <- data %>% + mutate({{ grp_var }} := create_subgrps({{ ref_var }}, grp_defs, grp_labs)) - if (!is.null(enexpr(num_grp_var))) { - out <- out %>% - create_var_from_codelist(metacore, {{ grp_var }}, {{ num_grp_var }}) - } + if (!is.null(enexpr(num_grp_var))) { + out <- out %>% + create_var_from_codelist(metacore, {{ grp_var }}, {{ num_grp_var }}) + } - missing <- out |> pull({{ grp_var }}) |> is.na() |> which()|> length() - if (strict && missing > 0) { - cli_warn(paste( - "There {qty(missing)} {?is/are} {missing} {qty(missing)} observation{?s}", - "in {as_name(enquo(ref_var))} that {qty(missing)} {?does/do} not fit into", - "the provided categories for {as_name(enquo(grp_var))}. Please check your", - "controlled terminology.") - ) - } - out + missing <- out |> + pull({{ grp_var }}) |> + is.na() |> + which() |> + length() + if (strict && missing > 0) { + cli_warn(paste( + "There {qty(missing)} {?is/are} {missing} {qty(missing)} observation{?s}", + "in {as_name(enquo(ref_var))} that {qty(missing)} {?does/do} not fit into", + "the provided categories for {as_name(enquo(grp_var))}. Please check your", + "controlled terminology." + )) + } + out } @@ -315,7 +327,7 @@ create_cat_var <- function(data, metacore, ref_var, grp_var, num_grp_var = NULL, #' # Variable with permitted value control terms #' convert_var_to_fct(dm, spec, ARM) convert_var_to_fct <- function(data, metacore, var) { - verify_DatasetMeta(metacore) + verify_DatasetMeta(metacore) code_translation <- get_control_term(metacore, {{ var }}) var_str <- as_label(enexpr(var)) %>% str_remove_all("\"") @@ -332,4 +344,3 @@ convert_var_to_fct <- function(data, metacore, var) { data %>% mutate({{ var }} := factor({{ var }}, levels = levels)) } - diff --git a/R/labels.R b/R/labels.R index 7e6cee3..67509dd 100644 --- a/R/labels.R +++ b/R/labels.R @@ -11,27 +11,29 @@ #' #' @examples #' add_labels( -#' mtcars, -#' mpg = "Miles Per Gallon", -#' cyl = "Cylinders" -#' ) +#' mtcars, +#' mpg = "Miles Per Gallon", +#' cyl = "Cylinders" +#' ) #' add_labels <- function(data, ...) { - # Pull out ellipsis to list - args <- list2(...) - - # Check params - if (!inherits(data, 'data.frame')) stop("Labels must be applied to a data.frame or tibble") - if (!is_named(args)) stop("Must provide variable name and label as named arguments") - if (!all(names(args) %in% names(data))) { - stop("All variable names supplied to label must be variables in data") - } - if (!all(map_lgl(args, is.character))) stop("All labels must be character") - - # Iterate the args supplied and update the variable labels in place - walk2(names(args), args, ~ {attr(data[[.x]], "label") <<- .y}) - - data + # Pull out ellipsis to list + args <- list2(...) + + # Check params + if (!inherits(data, "data.frame")) stop("Labels must be applied to a data.frame or tibble") + if (!is_named(args)) stop("Must provide variable name and label as named arguments") + if (!all(names(args) %in% names(data))) { + stop("All variable names supplied to label must be variables in data") + } + if (!all(map_lgl(args, is.character))) stop("All labels must be character") + + # Iterate the args supplied and update the variable labels in place + walk2(names(args), args, ~ { + attr(data[[.x]], "label") <<- .y + }) + + data } @@ -50,13 +52,13 @@ add_labels <- function(data, ...) { #' remove_labels(data) #' remove_labels <- function(data) { - # Check data - if (!inherits(data, 'data.frame')) stop("Labels must be removed from a data.frame or tibble") + # Check data + if (!inherits(data, "data.frame")) stop("Labels must be removed from a data.frame or tibble") - map_dfr(data, function(x){ - attr(x, "label") <- NULL - x - }) + map_dfr(data, function(x) { + attr(x, "label") <- NULL + x + }) } #' Apply labels to a data frame using a metacore object @@ -80,60 +82,59 @@ remove_labels <- function(data) { #' @examples #' #' mc <- metacore::spec_to_metacore( -#' metacore::metacore_example("p21_mock.xlsx"), -#' quiet=TRUE -#' ) +#' metacore::metacore_example("p21_mock.xlsx"), +#' quiet = TRUE +#' ) #' dm <- haven::read_xpt(metatools_example("dm.xpt")) #' set_variable_labels(dm, mc, dataset_name = "DM") set_variable_labels <- function(data, metacore, dataset_name = deprecated()) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "check_unique_keys(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "check_unique_keys(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - - # Grab out the var names and labels - var_spec <- metacore$var_spec %>% - select(variable, label) - - - ns <- var_spec$variable - labs <- var_spec$label - dns <- names(data) - - # Are there any variables in data not in the metadata - mismatch <- setdiff(dns, ns) - in_meta <- ns[which(ns %in% mismatch)] - in_data <- dns[which(dns %in% mismatch)] - - if (length(in_meta) > 0) { - wrn <- paste0("Variables in metadata not in data:\n\t", paste0(in_meta, collapse="\n\t")) - warning(wrn, call. = FALSE) - } - - if (length(in_data) > 0) { - wrn <- paste0("Variables in data not in metadata:\n\t", paste0(in_data, collapse="\n\t")) - warning(wrn, call. = FALSE) - } - - # Pick out only the variables which exist in both and build list - match <- intersect(ns, dns) - ind <- which(ns %in% match) - - # Subset and create a named list - ns <- ns[ind] - labs <- labs[ind] - names(labs) <- ns - labs <- as.list(labs) - - # Apply the labels to the data - args = append(list(data), labs) - do.call(add_labels, args) - + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) + + # Grab out the var names and labels + var_spec <- metacore$var_spec %>% + select(variable, label) + + + ns <- var_spec$variable + labs <- var_spec$label + dns <- names(data) + + # Are there any variables in data not in the metadata + mismatch <- setdiff(dns, ns) + in_meta <- ns[which(ns %in% mismatch)] + in_data <- dns[which(dns %in% mismatch)] + + if (length(in_meta) > 0) { + wrn <- paste0("Variables in metadata not in data:\n\t", paste0(in_meta, collapse = "\n\t")) + warning(wrn, call. = FALSE) + } + + if (length(in_data) > 0) { + wrn <- paste0("Variables in data not in metadata:\n\t", paste0(in_data, collapse = "\n\t")) + warning(wrn, call. = FALSE) + } + + # Pick out only the variables which exist in both and build list + match <- intersect(ns, dns) + ind <- which(ns %in% match) + + # Subset and create a named list + ns <- ns[ind] + labs <- labs[ind] + names(labs) <- ns + labs <- as.list(labs) + + # Apply the labels to the data + args <- append(list(data), labs) + do.call(add_labels, args) } diff --git a/R/sort.R b/R/sort.R index 5a3b4d9..4f26990 100644 --- a/R/sort.R +++ b/R/sort.R @@ -1,4 +1,3 @@ - #' Sort Columns by Order #' #' This function sorts the dataset according to the order found in the @@ -25,25 +24,25 @@ #' data <- read_xpt(metatools_example("adsl.xpt")) #' order_cols(data, spec) order_cols <- function(data, metacore, dataset_name = deprecated()) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "order_cols(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "order_cols(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - var_ord <- metacore$ds_vars %>% - filter(!is.na(order)) %>% - arrange(order) %>% - pull(variable) %>% - keep(~. %in% names(data)) + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) + var_ord <- metacore$ds_vars %>% + filter(!is.na(order)) %>% + arrange(order) %>% + pull(variable) %>% + keep(~ . %in% names(data)) - data %>% - select(all_of(var_ord), everything()) + data %>% + select(all_of(var_ord), everything()) } @@ -73,22 +72,22 @@ order_cols <- function(data, metacore, dataset_name = deprecated()) { #' data <- read_xpt(metatools_example("adsl.xpt")) #' sort_by_key(data, spec) sort_by_key <- function(data, metacore, dataset_name = deprecated()) { - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "sort_by_key(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "sort_by_key(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) - var_ord <- metacore$ds_vars %>% - filter(!is.na(key_seq)) %>% - arrange(key_seq) %>% - pull(variable) + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) + var_ord <- metacore$ds_vars %>% + filter(!is.na(key_seq)) %>% + arrange(key_seq) %>% + pull(variable) - data %>% - arrange(across(all_of(var_ord))) + data %>% + arrange(across(all_of(var_ord))) } diff --git a/R/supp.R b/R/supp.R index ad24de0..6d40c90 100644 --- a/R/supp.R +++ b/R/supp.R @@ -12,57 +12,60 @@ #' #' build_qnam <- function(dataset, qnam, qlabel, idvar, qeval, qorig) { - # Need QNAM as a variable - qval <- as.symbol(qnam) - - # DM won't have an IDVAR so handle that - if (is.na(idvar) || idvar == '') { - dataset <- dataset %>% - mutate(IDVARVAL = idvar) - idvarval <- sym('IDVARVAL') - - } else { - idvarval <- as.symbol(idvar) - } + # Need QNAM as a variable + qval <- as.symbol(qnam) + + # DM won't have an IDVAR so handle that + if (is.na(idvar) || idvar == "") { + dataset <- dataset %>% + mutate(IDVARVAL = idvar) + idvarval <- sym("IDVARVAL") + } else { + idvarval <- as.symbol(idvar) + } - dup_sup <- dataset %>% - select(STUDYID, RDOMAIN = DOMAIN, USUBJID, !!idvarval, !!qval) %>% - rename(IDVARVAL = !!idvarval, QVAL = !!qval) %>% - filter(!is.na(QVAL)) %>% - mutate( - IDVAR = idvar, - QNAM = qnam, - QLABEL = qlabel, - QORIG = qorig, - QEVAL = qeval - ) + dup_sup <- dataset %>% + select(STUDYID, RDOMAIN = DOMAIN, USUBJID, !!idvarval, !!qval) %>% + rename(IDVARVAL = !!idvarval, QVAL = !!qval) %>% + filter(!is.na(QVAL)) %>% + mutate( + IDVAR = idvar, + QNAM = qnam, + QLABEL = qlabel, + QORIG = qorig, + QEVAL = qeval + ) - out <- dup_sup %>% - distinct(STUDYID, RDOMAIN, - USUBJID, IDVARVAL, QNAM, .keep_all = TRUE) %>% - select(STUDYID, RDOMAIN, USUBJID, IDVAR, - IDVARVAL, QNAM, QLABEL, QVAL, - QORIG, QEVAL) + out <- dup_sup %>% + distinct(STUDYID, RDOMAIN, + USUBJID, IDVARVAL, QNAM, + .keep_all = TRUE + ) %>% + select( + STUDYID, RDOMAIN, USUBJID, IDVAR, + IDVARVAL, QNAM, QLABEL, QVAL, + QORIG, QEVAL + ) - test_out <- dup_sup %>% - distinct() - if(nrow(out) != nrow(test_out)){ - stop("The combination of STUDYID, RDOMAIN, USUBJID, IDVARVAL, QNAM is ambiguous. Consider modifying the IDVAR", - call. = FALSE) - } + test_out <- dup_sup %>% + distinct() + if (nrow(out) != nrow(test_out)) { + stop("The combination of STUDYID, RDOMAIN, USUBJID, IDVARVAL, QNAM is ambiguous. Consider modifying the IDVAR", + call. = FALSE + ) + } - blank_test <- out %>% - pull(QVAL) - if(any(blank_test == "")){ - message(paste0("Empty QVAL rows removed for QNAM = ", unique(out$QNAM))) - out <- out %>% - filter(QVAL != "") - } - out + blank_test <- out %>% + pull(QVAL) + if (any(blank_test == "")) { + message(paste0("Empty QVAL rows removed for QNAM = ", unique(out$QNAM))) + out <- out %>% + filter(QVAL != "") + } + out } - #' Make Supplemental Qualifier #' #' @param dataset dataset the supp will be pulled from @@ -87,38 +90,41 @@ build_qnam <- function(dataset, qnam, qlabel, idvar, qeval, qorig) { #' spec <- metacore %>% select_dataset("AE") #' ae <- combine_supp(sdtm_ae, sdtm_suppae) #' make_supp_qual(ae, spec) %>% as_tibble() -make_supp_qual <- function(dataset, metacore, dataset_name = deprecated()){ - if (is_present(dataset_name)) { - lifecycle::deprecate_warn( - when = "0.2.0", - what = "make_supp_qual(dataset_name)", - details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. +make_supp_qual <- function(dataset, metacore, dataset_name = deprecated()) { + if (is_present(dataset_name)) { + lifecycle::deprecate_warn( + when = "0.2.0", + what = "make_supp_qual(dataset_name)", + details = cli_inform(c("i" = col_red("The {.arg dataset_name} argument will be removed in a future release. Please use {.fn metacore::select_dataset} to subset the {.obj metacore} object to obtain metadata for a single dataset."))) - ) - metacore <- make_lone_dataset(metacore, dataset_name) - } - verify_DatasetMeta(metacore) + ) + metacore <- make_lone_dataset(metacore, dataset_name) + } + verify_DatasetMeta(metacore) - supp_vars <- metacore$ds_vars %>% - filter(supp_flag) - if(nrow(supp_vars) == 0){ - stop("No supplemental variables specified in metacore object. Please check your specifications", - call. = FALSE) - } + supp_vars <- metacore$ds_vars %>% + filter(supp_flag) + if (nrow(supp_vars) == 0) { + stop("No supplemental variables specified in metacore object. Please check your specifications", + call. = FALSE + ) + } - supp_meta <- supp_vars %>% - select(dataset, variable) %>% - left_join(metacore$var_spec, by = "variable") %>% - left_join(metacore$value_spec, by = c("dataset", "variable")) %>% - left_join(metacore$supp, by = c("dataset", "variable")) %>% - select(qnam = variable, qlabel = label, - qorig = origin, qeval = qeval, - idvar = idvar) %>% - distinct() #Protection against bad specs - #TODO Addin in checks/coercion for when combining cols of different types - pmap_dfr(supp_meta, build_qnam, dataset=dataset) %>% - arrange(USUBJID, QNAM, IDVARVAL) + supp_meta <- supp_vars %>% + select(dataset, variable) %>% + left_join(metacore$var_spec, by = "variable") %>% + left_join(metacore$value_spec, by = c("dataset", "variable")) %>% + left_join(metacore$supp, by = c("dataset", "variable")) %>% + select( + qnam = variable, qlabel = label, + qorig = origin, qeval = qeval, + idvar = idvar + ) %>% + distinct() # Protection against bad specs + # TODO Addin in checks/coercion for when combining cols of different types + pmap_dfr(supp_meta, build_qnam, dataset = dataset) %>% + arrange(USUBJID, QNAM, IDVARVAL) } #' Combine the Domain and Supplemental Qualifier @@ -132,62 +138,66 @@ make_supp_qual <- function(dataset, metacore, dataset_name = deprecated()){ #' @examples #' library(safetyData) #' library(tibble) -#' combine_supp(sdtm_ae, sdtm_suppae) %>% as_tibble() -combine_supp <- function(dataset, supp){ - if(!is.data.frame(dataset) | !is.data.frame(supp)){ - stop("You must supply a domain and supplemental dataset", call. = FALSE) - } +#' combine_supp(sdtm_ae, sdtm_suppae) %>% as_tibble() +combine_supp <- function(dataset, supp) { + if (!is.data.frame(dataset) | !is.data.frame(supp)) { + stop("You must supply a domain and supplemental dataset", call. = FALSE) + } if (nrow(supp) == 0) { warning("Zero rows in supp, returning original dataset unchanged") return(dataset) } - supp_cols <- c("STUDYID", "RDOMAIN", "USUBJID", "IDVAR", "IDVARVAL", - "QNAM", "QLABEL", "QVAL", "QORIG") - maybe <- c("QEVAL") - ext_supp_col <- names(supp) %>% discard(~. %in% c(supp_cols, maybe)) - mis_supp_col <- supp_cols %>% discard(~. %in% names(supp)) - if(length(ext_supp_col) > 0 | length(mis_supp_col) > 0){ - mess <- "Supplemental datasets need to comply with CDISC standards\n" - ext <- if_else(length(ext_supp_col) > 0, - paste0("The following columns need to be removed:\n", paste0(ext_supp_col, collapse = "\n")), - "") - mis <- if_else(length(mis_supp_col) > 0, - paste0("The following columns are missing:\n", paste0(mis_supp_col, collapse = "\n")), - "") - stop(paste0(mess, ext, mis)) - } - all_qnam <- unique(supp$QNAM) - existing_qnam <- intersect(all_qnam, names(dataset)) - if (length(existing_qnam) > 0) { - stop( - "The following column(s) would be created by combine_supp(), but are already in the original dataset:\n ", - paste(existing_qnam, sep = ", ") - ) - } - - # In order to prevent issues when there are multiple IDVARS we need to merge - # each IDVAR into the domain seperately (otherwise there is problems when the - # two IDVARS don't overlap) - - supp_wides_prep <- - supp %>% - select(-any_of(c("QLABEL", "QORIG", "QEVAL"))) %>% #Removing columns not for the main dataset - rename(DOMAIN = RDOMAIN) %>% - group_by(IDVAR, QNAM) %>% #For when there are multiple IDs - group_split() + supp_cols <- c( + "STUDYID", "RDOMAIN", "USUBJID", "IDVAR", "IDVARVAL", + "QNAM", "QLABEL", "QVAL", "QORIG" + ) + maybe <- c("QEVAL") + ext_supp_col <- names(supp) %>% discard(~ . %in% c(supp_cols, maybe)) + mis_supp_col <- supp_cols %>% discard(~ . %in% names(supp)) + if (length(ext_supp_col) > 0 | length(mis_supp_col) > 0) { + mess <- "Supplemental datasets need to comply with CDISC standards\n" + ext <- if_else(length(ext_supp_col) > 0, + paste0("The following columns need to be removed:\n", paste0(ext_supp_col, collapse = "\n")), + "" + ) + mis <- if_else(length(mis_supp_col) > 0, + paste0("The following columns are missing:\n", paste0(mis_supp_col, collapse = "\n")), + "" + ) + stop(paste0(mess, ext, mis)) + } + all_qnam <- unique(supp$QNAM) + existing_qnam <- intersect(all_qnam, names(dataset)) + if (length(existing_qnam) > 0) { + stop( + "The following column(s) would be created by combine_supp(), but are already in the original dataset:\n ", + paste(existing_qnam, sep = ", ") + ) + } - supp_wides <- purrr::pmap(.l = list(supp = supp_wides_prep), .f = combine_supp_make_wide) - ret <- reduce(.x = append(list(dataset), supp_wides), .f = combine_supp_join) - ret$IDVARVAL <- NULL + # In order to prevent issues when there are multiple IDVARS we need to merge + # each IDVAR into the domain seperately (otherwise there is problems when the + # two IDVARS don't overlap) - labels_to_add <- unique(supp[, c("QNAM", "QLABEL")]) - for (current_idx in seq_len(nrow(labels_to_add))) { - current_col <- labels_to_add$QNAM[current_idx] - current_label <- labels_to_add$QLABEL[current_idx] - attr(ret[[current_col]], "label") <- current_label - } + supp_wides_prep <- + supp %>% + select(-any_of(c("QLABEL", "QORIG", "QEVAL"))) %>% # Removing columns not for the main dataset + rename(DOMAIN = RDOMAIN) %>% + group_by(IDVAR, QNAM) %>% # For when there are multiple IDs + group_split() + + supp_wides <- purrr::pmap(.l = list(supp = supp_wides_prep), .f = combine_supp_make_wide) + ret <- reduce(.x = append(list(dataset), supp_wides), .f = combine_supp_join) + ret$IDVARVAL <- NULL + + labels_to_add <- unique(supp[, c("QNAM", "QLABEL")]) + for (current_idx in seq_len(nrow(labels_to_add))) { + current_col <- labels_to_add$QNAM[current_idx] + current_label <- labels_to_add$QLABEL[current_idx] + attr(ret[[current_col]], "label") <- current_label + } - ret + ret } # Create a wide version of `supp` for merging into the source dataset. @@ -248,14 +258,15 @@ combine_supp_join <- function(dataset, supp) { if (expected_na_difference != actual_na_difference) { stop( "An unexpected number of rows were replaced while merging QNAM ", current_qnam, " and IDVAR ", current_idvar, - "\n Please verify that your SUPP domain is valid SDTM with only one matched row per key column set") + "\n Please verify that your SUPP domain is valid SDTM with only one matched row per key column set" + ) } } else { # Verify that nothing will be missed missing <- anti_join(supp_prep, ret, by = by) # Add message for when there are rows in the supp that didn't get merged - if(nrow(missing) > 0) { + if (nrow(missing) > 0) { missing_txt <- capture.output( missing %>% @@ -263,9 +274,13 @@ combine_supp_join <- function(dataset, supp) { print() ) %>% paste0(collapse = "\n") - stop(paste0("Not all rows of the Supp were merged. The following rows are missing:\n", - missing_txt), - call. = FALSE) + stop( + paste0( + "Not all rows of the Supp were merged. The following rows are missing:\n", + missing_txt + ), + call. = FALSE + ) } # join the data @@ -281,49 +296,55 @@ combine_supp_join <- function(dataset, supp) { #' #' @return list of datasets #' @noRd -combine_supp_by_idvar <- function(dataset, supp){ - # Get the IDVAR value to allow for renaming of IDVARVAL - id_var <- unique(supp$IDVAR) - - wide_x <- supp %>% - pivot_wider( - names_from = QNAM, - values_from = QVAL) %>% - select(-IDVAR) - - if(!is.na(id_var) && id_var != ""){ - id_var_sym <- sym(id_var) - - by <- c("STUDYID", "DOMAIN", "USUBJID", "IDVARVAL") - wide_x <- wide_x %>% - mutate(IDVARVAL = as.character(IDVARVAL) %>% - str_trim()) - # Make a dummy IDVARVAL variable to merge on, won't effect the dataset - dataset_chr <- dataset %>% - mutate(IDVARVAL = as.character(!!id_var_sym) %>% - str_trim()) +combine_supp_by_idvar <- function(dataset, supp) { + # Get the IDVAR value to allow for renaming of IDVARVAL + id_var <- unique(supp$IDVAR) - out <- left_join(dataset_chr, wide_x, - by = by) %>% - select(-IDVARVAL) - missing <- anti_join(wide_x,dataset_chr, by = by) + wide_x <- supp %>% + pivot_wider( + names_from = QNAM, + values_from = QVAL + ) %>% + select(-IDVAR) - # Add message for when there are rows in the supp that didn't get merged - if(nrow(missing) > 0) { - missing_txt <- capture.output(missing %>% - select(USUBJID, !!sym(id_var)) %>% - print()) %>% - paste0(collapse = "\n") - stop(paste0("Not all rows of the Supp were merged. The following rows are missing:\n", - missing_txt), - call. = FALSE) - } + if (!is.na(id_var) && id_var != "") { + id_var_sym <- sym(id_var) + + by <- c("STUDYID", "DOMAIN", "USUBJID", "IDVARVAL") + wide_x <- wide_x %>% + mutate(IDVARVAL = as.character(IDVARVAL) %>% + str_trim()) + # Make a dummy IDVARVAL variable to merge on, won't effect the dataset + dataset_chr <- dataset %>% + mutate(IDVARVAL = as.character(!!id_var_sym) %>% + str_trim()) + + out <- left_join(dataset_chr, wide_x, + by = by + ) %>% + select(-IDVARVAL) + missing <- anti_join(wide_x, dataset_chr, by = by) - } else { - wide_x <- wide_x %>% - select(-IDVARVAL) - out <- left_join(dataset, wide_x, - by = c("STUDYID", "DOMAIN", "USUBJID")) - } - out + # Add message for when there are rows in the supp that didn't get merged + if (nrow(missing) > 0) { + missing_txt <- capture.output(missing %>% + select(USUBJID, !!sym(id_var)) %>% + print()) %>% + paste0(collapse = "\n") + stop( + paste0( + "Not all rows of the Supp were merged. The following rows are missing:\n", + missing_txt + ), + call. = FALSE + ) + } + } else { + wide_x <- wide_x %>% + select(-IDVARVAL) + out <- left_join(dataset, wide_x, + by = c("STUDYID", "DOMAIN", "USUBJID") + ) + } + out } diff --git a/R/utils.R b/R/utils.R index cf251ed..f78c30c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -32,10 +32,10 @@ metatools_example <- function(file = NULL) { #' @return metacore object #' @noRd make_lone_dataset <- function(metacore, dataset_name) { - lifecycle::deprecate_soft( - what = "make_lone_dataset()", - when = "0.2.0" - ) + lifecycle::deprecate_soft( + what = "make_lone_dataset()", + when = "0.2.0" + ) if (!(nrow(metacore$ds_spec) == 1 | !is.null(dataset_name))) { stop("Requires either a subsetted metacore object or a dataset name", call. = FALSE) } diff --git a/README.Rmd b/README.Rmd index f5812fd..5c87bb1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -51,14 +51,14 @@ metacore <- metacore %>% select_dataset("ADSL") ds_list <- list(DM = read_xpt(metatools_example("dm.xpt"))) # Pull in columns from DM to be in ADSL -adsl <- build_from_derived(metacore, ds_list, predecessor_only = FALSE) -adsl +adsl <- build_from_derived(metacore, ds_list, predecessor_only = FALSE) +adsl # Now we can make some new columns and converting columns into factors adsl %>% - select(USUBJID, AGE, ETHNIC) %>% - create_cat_var(metacore, AGE, AGEGR1, AGEGR1N) %>% #Add an AGEGR1 and AGEGR1N column - convert_var_to_fct(metacore, ETHNIC) # Change ETHNIC to as factor + select(USUBJID, AGE, ETHNIC) %>% + create_cat_var(metacore, AGE, AGEGR1, AGEGR1N) %>% # Add an AGEGR1 and AGEGR1N column + convert_var_to_fct(metacore, ETHNIC) # Change ETHNIC to as factor ``` Metatools can also be used to run checks @@ -68,8 +68,7 @@ data <- read_xpt(metatools_example("adsl.xpt")) # Checks can be run on a single column check_ct_col(data, metacore, TRT01PN) # Checks column only contains control terminology -# Or across all the columns -check_ct_data(data, metacore) %>% # Checks control terminology for all columns -check_variables(metacore) # Check all variables in the metadata are in the dataset and there aren't any extra columns - +# Or across all the columns +check_ct_data(data, metacore) %>% # Checks control terminology for all columns + check_variables(metacore) # Check all variables in the metadata are in the dataset and there aren't any extra columns ``` diff --git a/tests/spelling.R b/tests/spelling.R index 6713838..13f77d9 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,3 +1,6 @@ -if(requireNamespace('spelling', quietly = TRUE)) - spelling::spell_check_test(vignettes = TRUE, error = FALSE, - skip_on_cran = TRUE) +if (requireNamespace("spelling", quietly = TRUE)) { + spelling::spell_check_test( + vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE + ) +} diff --git a/tests/testthat/test-build.R b/tests/testthat/test-build.R index fc89c0a..d0903ca 100644 --- a/tests/testthat/test-build.R +++ b/tests/testthat/test-build.R @@ -1,11 +1,11 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) load(metacore::metacore_example("pilot_ADaM.rda")) spec <- metacore %>% select_dataset("ADSL", quiet = TRUE) test_that("drop_unspec_vars", { data <- haven::read_xpt(metatools_example("adsl.xpt")) %>% - mutate(AGEGR2 = 'DUMMY', AGEGR2N = 99, foo = "Hello", foo2 = "world") + mutate(AGEGR2 = "DUMMY", AGEGR2N = 99, foo = "Hello", foo2 = "world") man_vars <- metacore$ds_vars %>% filter(dataset == "ADSL") %>% @@ -15,8 +15,8 @@ test_that("drop_unspec_vars", { drop_unspec_vars(data, spec) %>% expect_equal(man_dat) expect_message(drop_unspec_vars(data, spec), - label = "The following variable(s) were dropped:\n foo\n foo2") - + label = "The following variable(s) were dropped:\n foo\n foo2" + ) }) @@ -50,138 +50,152 @@ test_that("build_from_derived", { sort() expect_warning( - build_from_derived(spec, ds_list, - predecessor_only = FALSE, - keep = TRUE - ) %>% - names() %>% - sort() %>% - expect_equal(man_vars), - label = paste0("Setting 'keep' = TRUE has been superseded, and will be", - " unavailable in future releases. Please consider setting", - " 'keep' equal to 'ALL' or 'PREREQUISITE'.") + build_from_derived(spec, ds_list, + predecessor_only = FALSE, + keep = TRUE + ) %>% + names() %>% + sort() %>% + expect_equal(man_vars), + label = paste0( + "Setting 'keep' = TRUE has been superseded, and will be", + " unavailable in future releases. Please consider setting", + " 'keep' equal to 'ALL' or 'PREREQUISITE'." + ) ) # Pulling through from more than one dataset spec2 <- metacore %>% select_dataset("ADAE", quiet = TRUE) adae_auto <- build_from_derived(spec2, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = FALSE + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = FALSE ) ae_part_vars <- spec2$derivations %>% - filter(str_detect(derivation,"AE\\.[[:alnum:]]*$")) %>% - pull(derivation) %>% - str_remove("^AE\\.") %>% - c("STUDYID", "USUBJID", .) + filter(str_detect(derivation, "AE\\.[[:alnum:]]*$")) %>% + pull(derivation) %>% + str_remove("^AE\\.") %>% + c("STUDYID", "USUBJID", .) ae_part <- select(safetyData::sdtm_ae, all_of(ae_part_vars)) adsl_part_vars <- spec2$derivations %>% - filter(str_detect(derivation,"ADSL\\.[[:alnum:]]*$")) %>% - pull(derivation) %>% - str_remove("^ADSL\\.") + filter(str_detect(derivation, "ADSL\\.[[:alnum:]]*$")) %>% + pull(derivation) %>% + str_remove("^ADSL\\.") adsl_part <- - select(safetyData::adam_adsl, all_of(adsl_part_vars)) |> - rename(TRTA = TRT01A, TRTAN = TRT01AN) + select(safetyData::adam_adsl, all_of(adsl_part_vars)) |> + rename(TRTA = TRT01A, TRTAN = TRT01AN) adae_man <- full_join(adsl_part, ae_part, by = c("STUDYID", "USUBJID"), multiple = "all") %>% - select(all_of(names(adae_auto)), everything()) - expect_equal(adae_auto,adae_man ) + select(all_of(names(adae_auto)), everything()) + expect_equal(adae_auto, adae_man) # Pulling through from one dataset when spec has more than one adae_auto_adsl_only <- build_from_derived(spec2, - ds_list = list("ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = FALSE + ds_list = list("ADSL" = safetyData::adam_adsl), + predecessor_only = FALSE, + keep = FALSE ) |> - order_cols(spec2) + order_cols(spec2) adsl_man <- order_cols(adsl_part, spec2) expect_equal(adae_auto_adsl_only, adsl_man) - adsl = safetyData::adam_adsl - ae = safetyData::sdtm_ae + adsl <- safetyData::adam_adsl + ae <- safetyData::sdtm_ae adae_auto_unnamed <- build_from_derived(spec2, - ds_list = list(ae, adsl), - predecessor_only = FALSE, - keep = FALSE + ds_list = list(ae, adsl), + predecessor_only = FALSE, + keep = FALSE ) - expect_equal(adae_auto,adae_man) + expect_equal(adae_auto, adae_man) expect_warning(build_from_derived(spec2, - ds_list = list(safetyData::sdtm_ae, adsl), - predecessor_only = FALSE, - keep = FALSE + ds_list = list(safetyData::sdtm_ae, adsl), + predecessor_only = FALSE, + keep = FALSE )) # Pulling through all columns from original dataset adae_full <- build_from_derived(spec2, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = "ALL" + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = "ALL" ) full_adsl_part <- safetyData::adam_adsl %>% - mutate(TRTA = TRT01A, TRTAN = TRT01AN) + mutate(TRTA = TRT01A, TRTAN = TRT01AN) adae_all_man <- full_join(full_adsl_part, safetyData::sdtm_ae, by = c("STUDYID", "USUBJID"), multiple = "all") - expect_equal(adae_full,adae_all_man) + expect_equal(adae_full, adae_all_man) # Pulling through columns required for future derivations spec3 <- metacore %>% select_dataset("ADAE", quiet = TRUE) adae_prereq <- build_from_derived(spec3, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = "PREREQUISITE" + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = "PREREQUISITE" ) adae_auto <- build_from_derived(spec3, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = "PREREQUISITE" + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = "PREREQUISITE" ) adae_all <- build_from_derived(spec3, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = "ALL" + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = "ALL" ) adae_prereq_man <- adae_all %>% - select(c(names(adae_auto))) + select(c(names(adae_auto))) expect_equal(adae_prereq, adae_prereq_man) - }) - test_that("add_variables", { - load(metacore::metacore_example("pilot_ADaM.rda")) - spec <- metacore %>% select_dataset("ADSL", quiet = TRUE) - data <- haven::read_xpt(metatools_example("adsl.xpt")) %>% - mutate(AGEGR2 = "DUMMY", AGEGR2N = 99) - data_mis <- data %>% - select(-TRTSDT, -TRT01P, -TRT01PN) - #Check data when there is missing - fx_miss <- add_variables(data_mis, spec) %>% - select(TRTSDT, TRT01P, TRT01PN) - man_miss <- data %>% - mutate(TRTSDT = as.Date(NA_integer_), - TRT01P = NA_character_, - TRT01PN = NA_integer_) %>% - select(TRTSDT, TRT01P, TRT01PN) - expect_equal(fx_miss, man_miss) - #Check data when there isn't any missing - expect_equal(add_variables(data, spec), - data) + load(metacore::metacore_example("pilot_ADaM.rda")) + spec <- metacore %>% select_dataset("ADSL", quiet = TRUE) + data <- haven::read_xpt(metatools_example("adsl.xpt")) %>% + mutate(AGEGR2 = "DUMMY", AGEGR2N = 99) + data_mis <- data %>% + select(-TRTSDT, -TRT01P, -TRT01PN) + # Check data when there is missing + fx_miss <- add_variables(data_mis, spec) %>% + select(TRTSDT, TRT01P, TRT01PN) + man_miss <- data %>% + mutate( + TRTSDT = as.Date(NA_integer_), + TRT01P = NA_character_, + TRT01PN = NA_integer_ + ) %>% + select(TRTSDT, TRT01P, TRT01PN) + expect_equal(fx_miss, man_miss) + # Check data when there isn't any missing + expect_equal( + add_variables(data, spec), + data + ) }) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 90a335f..ef394c2 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -1,5 +1,5 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) # Load data to use across tests load(metacore::metacore_example("pilot_ADaM.rda")) @@ -7,125 +7,135 @@ spec <- metacore %>% select_dataset("ADSL", quiet = TRUE) data <- haven::read_xpt(metatools_example("adsl.xpt")) mod_ds_vars <- spec$ds_vars %>% - mutate(core = if_else(variable %in% c("TRT01PN", "COMP8FL"), "Required", core)) + mutate(core = if_else(variable %in% c("TRT01PN", "COMP8FL"), "Required", core)) spec_mod <- metacore::metacore(spec$ds_spec, mod_ds_vars, spec$var_spec, spec$value_spec, spec$derivations, spec$codelist) %>% - suppressWarnings() + suppressWarnings() spec_mod <- select_dataset(spec_mod, "ADSL", quiet = TRUE) test_that("get_bad_ct works correctly", { - - # test na_acceptable - expect_equal(get_bad_ct(data, spec, "DCSREAS"), character(0)) - expect_equal(get_bad_ct(data, spec, "DCSREAS", TRUE), character(0)) - expect_equal(get_bad_ct(data, spec, "DCSREAS", FALSE), "") - - expect_equal(get_bad_ct(data, spec_mod, "COMP8FL"), "") - expect_equal(get_bad_ct(data, spec_mod, "COMP8FL", TRUE), character(0)) - expect_equal(get_bad_ct(data, spec_mod, "COMP8FL", FALSE), "") - - data_na <- data %>% - mutate(COMP8FL = if_else(dplyr::row_number() == 1, NA_character_, COMP8FL)) - expect_equal(get_bad_ct(data_na, spec_mod, "COMP8FL"), c(NA_character_, "")) - + # test na_acceptable + expect_equal(get_bad_ct(data, spec, "DCSREAS"), character(0)) + expect_equal(get_bad_ct(data, spec, "DCSREAS", TRUE), character(0)) + expect_equal(get_bad_ct(data, spec, "DCSREAS", FALSE), "") + + expect_equal(get_bad_ct(data, spec_mod, "COMP8FL"), "") + expect_equal(get_bad_ct(data, spec_mod, "COMP8FL", TRUE), character(0)) + expect_equal(get_bad_ct(data, spec_mod, "COMP8FL", FALSE), "") + + data_na <- data %>% + mutate(COMP8FL = if_else(dplyr::row_number() == 1, NA_character_, COMP8FL)) + expect_equal(get_bad_ct(data_na, spec_mod, "COMP8FL"), c(NA_character_, "")) }) test_that("check_ct_col works correctly", { - # Check it works with a character col - expect_equal(check_ct_col(data, spec, ARM), data) - # Check it works with a numeric col - expect_equal(check_ct_col(data, spec, TRT01PN), data) - # Check it works when passes a string - expect_equal(check_ct_col(data, spec, "TRT01PN"), data) - - # Test permitted Values - spec2 <- metacore::spec_to_metacore(metacore::metacore_example("p21_mock.xlsx"), quiet = TRUE) - dm <- select_dataset(spec2, "DM", quiet = TRUE) - expect_equal(check_ct_col(data, dm, ARM), data) - - # Test external dictionaries - data2 <- tibble::tibble(AELLT = "Hello") - ae <- select_dataset(spec2, "AE", quiet = TRUE) - expect_error(check_ct_col(data2, ae, AELLT), - "We currently don't have the ability to check against external libraries") - - # Test a column that isn't in the dataset - expect_error(check_ct_col(data, ae, AELLT), - "AELLT not found in dataset. Please check and try again") - - # Test NA acceptable - expect_error(check_ct_col(data, dm, COMP8FL, FALSE)) - - expect_equal(check_ct_col(data, dm, ARM, TRUE), data) - data_w_miss <- data %>% - mutate(TRT01PN = if_else(dplyr::row_number() == 3, NA_real_, TRT01PN)) - expect_error(check_ct_col(data_w_miss, spec, TRT01PN, FALSE)) - expect_equal(get_bad_ct(data_w_miss, spec, TRT01PN, FALSE), NA_real_) - expect_equal(check_ct_col(data_w_miss, spec, TRT01PN, TRUE), data_w_miss) - ### Test with a required column ### - # Required without missing - expect_equal(check_ct_col(data, spec_mod, TRT01PN), data) - # Required with missing - expect_error(check_ct_col(data, spec_mod, COMP8FL)) - expect_equal(get_bad_ct(data, spec_mod, COMP8FL), "") - expect_equal(check_ct_col(data, spec_mod, COMP8FL, TRUE), data) + # Check it works with a character col + expect_equal(check_ct_col(data, spec, ARM), data) + # Check it works with a numeric col + expect_equal(check_ct_col(data, spec, TRT01PN), data) + # Check it works when passes a string + expect_equal(check_ct_col(data, spec, "TRT01PN"), data) + + # Test permitted Values + spec2 <- metacore::spec_to_metacore(metacore::metacore_example("p21_mock.xlsx"), quiet = TRUE) + dm <- select_dataset(spec2, "DM", quiet = TRUE) + expect_equal(check_ct_col(data, dm, ARM), data) + + # Test external dictionaries + data2 <- tibble::tibble(AELLT = "Hello") + ae <- select_dataset(spec2, "AE", quiet = TRUE) + expect_error( + check_ct_col(data2, ae, AELLT), + "We currently don't have the ability to check against external libraries" + ) + + # Test a column that isn't in the dataset + expect_error( + check_ct_col(data, ae, AELLT), + "AELLT not found in dataset. Please check and try again" + ) + + # Test NA acceptable + expect_error(check_ct_col(data, dm, COMP8FL, FALSE)) + + expect_equal(check_ct_col(data, dm, ARM, TRUE), data) + data_w_miss <- data %>% + mutate(TRT01PN = if_else(dplyr::row_number() == 3, NA_real_, TRT01PN)) + expect_error(check_ct_col(data_w_miss, spec, TRT01PN, FALSE)) + expect_equal(get_bad_ct(data_w_miss, spec, TRT01PN, FALSE), NA_real_) + expect_equal(check_ct_col(data_w_miss, spec, TRT01PN, TRUE), data_w_miss) + ### Test with a required column ### + # Required without missing + expect_equal(check_ct_col(data, spec_mod, TRT01PN), data) + # Required with missing + expect_error(check_ct_col(data, spec_mod, COMP8FL)) + expect_equal(get_bad_ct(data, spec_mod, COMP8FL), "") + expect_equal(check_ct_col(data, spec_mod, COMP8FL, TRUE), data) }) test_that("check_ct_data works correctly", { - # Checking error for multiple words in multiple columns - data_multi_word <- data %>% - mutate(TRT01P = case_when(dplyr::row_number() == 2 ~ "Hello", - dplyr::row_number() == 3 ~ "World", - TRUE ~ TRT01P), - TRT01A = TRT01P) - expect_error(check_ct_data(data_multi_word, spec)) - - expect_error(check_ct_data(data, spec, FALSE)) - expect_equal(check_ct_data(data, spec, omit_vars = c("AGEGR2", "AGEGR2N")), data) - expect_equal(check_ct_data(data, spec, TRUE, omit_vars = c("AGEGR2", "AGEGR2N")), data) - expect_error(check_ct_data(data, spec_mod)) - expect_equal(check_ct_data(data, spec_mod, TRUE, omit_vars = c("AGEGR2", "AGEGR2N")), data) - - # Check character vector input for na_acceptable: - expect_error(check_ct_data(data, spec, na_acceptable = c("DISCONFL", "DSRAEFL"))) - expect_error(check_ct_data(data, spec, 1)) - - # Check omit_vars: - expect_error(check_ct_data(data, spec, omit_vars = c("A", "B"))) - expect_error(check_ct_data(data, spec, FALSE, omit_vars = c("DISCONFL", "DSRAEFL"))) - expect_equal( - check_ct_data( - data, - spec_mod, - na_acceptable = NULL, - omit_vars = c("AGEGR2", "AGEGR2N", "COMP8FL")), - data - ) + # Checking error for multiple words in multiple columns + data_multi_word <- data %>% + mutate( + TRT01P = case_when( + dplyr::row_number() == 2 ~ "Hello", + dplyr::row_number() == 3 ~ "World", + TRUE ~ TRT01P + ), + TRT01A = TRT01P + ) + expect_error(check_ct_data(data_multi_word, spec)) + + expect_error(check_ct_data(data, spec, FALSE)) + expect_equal(check_ct_data(data, spec, omit_vars = c("AGEGR2", "AGEGR2N")), data) + expect_equal(check_ct_data(data, spec, TRUE, omit_vars = c("AGEGR2", "AGEGR2N")), data) + expect_error(check_ct_data(data, spec_mod)) + expect_equal(check_ct_data(data, spec_mod, TRUE, omit_vars = c("AGEGR2", "AGEGR2N")), data) + + # Check character vector input for na_acceptable: + expect_error(check_ct_data(data, spec, na_acceptable = c("DISCONFL", "DSRAEFL"))) + expect_error(check_ct_data(data, spec, 1)) + + # Check omit_vars: + expect_error(check_ct_data(data, spec, omit_vars = c("A", "B"))) + expect_error(check_ct_data(data, spec, FALSE, omit_vars = c("DISCONFL", "DSRAEFL"))) + expect_equal( + check_ct_data( + data, + spec_mod, + na_acceptable = NULL, + omit_vars = c("AGEGR2", "AGEGR2N", "COMP8FL") + ), + data + ) }) test_that("variable_check works correctly", { - expect_equal(check_variables(data, spec), data) - data_miss <- data %>% select(-1) - expect_error(check_variables(data_miss, spec)) - data_extra <- data %>% mutate(foo = "hello") - expect_error(check_variables(data_extra, spec)) - data_mis_ex <- data_extra %>% select(-1) - expect_error(check_variables(data_mis_ex, spec)) + expect_equal(check_variables(data, spec), data) + data_miss <- data %>% select(-1) + expect_error(check_variables(data_miss, spec)) + data_extra <- data %>% mutate(foo = "hello") + expect_error(check_variables(data_extra, spec)) + data_mis_ex <- data_extra %>% select(-1) + expect_error(check_variables(data_mis_ex, spec)) }) test_that("check_unique_keys works as expected", { - #check requirement for subsetted metacore object or a dataset name - expect_error(check_unique_keys(data, metacore)) - #check missing variable keys error - adae <- select_dataset(metacore, "ADAE", quiet = TRUE) - expect_error(check_unique_keys(data, adae)) - #check works correctly when records are unique - adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) - expect_message(check_unique_keys(data, adsl)) - #check works correctly when records are not unique - test <- build_from_derived(adae, - ds_list = list("AE" = safetyData::sdtm_ae, - "ADSL" = safetyData::adam_adsl), - predecessor_only = FALSE, - keep = FALSE) - expect_error(check_unique_keys(test, adae)) + # check requirement for subsetted metacore object or a dataset name + expect_error(check_unique_keys(data, metacore)) + # check missing variable keys error + adae <- select_dataset(metacore, "ADAE", quiet = TRUE) + expect_error(check_unique_keys(data, adae)) + # check works correctly when records are unique + adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) + expect_message(check_unique_keys(data, adsl)) + # check works correctly when records are not unique + test <- build_from_derived(adae, + ds_list = list( + "AE" = safetyData::sdtm_ae, + "ADSL" = safetyData::adam_adsl + ), + predecessor_only = FALSE, + keep = FALSE + ) + expect_error(check_unique_keys(test, adae)) }) diff --git a/tests/testthat/test-codelist.R b/tests/testthat/test-codelist.R index 0301591..fb8c41c 100644 --- a/tests/testthat/test-codelist.R +++ b/tests/testthat/test-codelist.R @@ -1,5 +1,5 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) spec <- metacore::spec_to_metacore(metacore::metacore_example("p21_mock.xlsx"), quiet = TRUE) dm_spec <- select_dataset(spec, "DM", quiet = TRUE) @@ -26,8 +26,8 @@ test_that("create_subgrps", { expect_equal( - create_subgrps(c(1:10, NA), c("<2", "2-5", ">5")), - c("<2", "2-5", "2-5", "2-5", "2-5", ">5", ">5", ">5", ">5", ">5", NA) + create_subgrps(c(1:10, NA), c("<2", "2-5", ">5")), + c("<2", "2-5", "2-5", "2-5", "2-5", ">5", ">5", ">5", ">5", ">5", NA) ) }) @@ -53,81 +53,81 @@ test_that("create_var_from_codelist", { ) # Test numeric num_out <- dm %>% - filter(ARMCD != "Scrnfail") %>% - mutate(TRT01P = ARM) %>% - select(TRT01P, ARMCD) %>% - create_var_from_codelist(adsl_spec, TRT01P, TRT01PN) %>% - head() %>% - pull(TRT01PN) - expect_equal(num_out, c(0, 0, 81, 54, 81,0)) + filter(ARMCD != "Scrnfail") %>% + mutate(TRT01P = ARM) %>% + select(TRT01P, ARMCD) %>% + create_var_from_codelist(adsl_spec, TRT01P, TRT01PN) %>% + head() %>% + pull(TRT01PN) + expect_equal(num_out, c(0, 0, 81, 54, 81, 0)) # Test provide custom codelist - load(metacore::metacore_example('pilot_ADaM.rda')) + load(metacore::metacore_example("pilot_ADaM.rda")) adlb_spec <- metacore::select_dataset(metacore, "ADLBC", quiet = TRUE) data <- tibble::tibble( - PARAMCD = c("ALB", "ALP", "ALT", "DUMMY", "DUMMY2") + PARAMCD = c("ALB", "ALP", "ALT", "DUMMY", "DUMMY2") ) compare <- tibble::tibble( - PARAMCD = c("ALB", "ALP", "ALT", "DUMMY", "DUMMY2"), - PARAM = c("Albumin (g/L)", "Alkaline Phosphatase (U/L)", "Alanine Aminotransferase (U/L)", NA, NA) + PARAMCD = c("ALB", "ALP", "ALT", "DUMMY", "DUMMY2"), + PARAM = c("Albumin (g/L)", "Alkaline Phosphatase (U/L)", "Alanine Aminotransferase (U/L)", NA, NA) ) create_var_from_codelist( - data = data, - metacore = adlb_spec, - input_var = PARAMCD, - out_var = PARAM, - codelist = get_control_term(adlb_spec, PARAMCD), - decode_to_code = FALSE, - strict = FALSE + data = data, + metacore = adlb_spec, + input_var = PARAMCD, + out_var = PARAM, + codelist = get_control_term(adlb_spec, PARAMCD), + decode_to_code = FALSE, + strict = FALSE ) |> - select(PARAMCD, PARAM) |> - expect_equal(compare) + select(PARAMCD, PARAM) |> + expect_equal(compare) # Test warning where arg `strict == TRUE` create_var_from_codelist( - data = data, - metacore = adlb_spec, - input_var = PARAMCD, - out_var = PARAM, - codelist = get_control_term(adlb_spec, PARAMCD), - decode_to_code = FALSE, - strict = TRUE + data = data, + metacore = adlb_spec, + input_var = PARAMCD, + out_var = PARAM, + codelist = get_control_term(adlb_spec, PARAMCD), + decode_to_code = FALSE, + strict = TRUE ) |> - expect_warning() + expect_warning() # Test numeric variable used as input_var (strict == FALSE) data2 <- tibble::tibble( - PARAMN = c(18, 19, 20, 99) + PARAMN = c(18, 19, 20, 99) ) compare2 <- tibble::tibble( - PARAMN = c(18, 19, 20, 99), - PARAM = c("Sodium (mmol/L)", "Potassium (mmol/L)", "Chloride (mmol/L)", NA) + PARAMN = c(18, 19, 20, 99), + PARAM = c("Sodium (mmol/L)", "Potassium (mmol/L)", "Chloride (mmol/L)", NA) ) create_var_from_codelist( - data = data2, - metacore = adlb_spec, - input_var = PARAMN, - out_var = PARAM, - codelist = get_control_term(adlb_spec, PARAMN), - decode_to_code = FALSE, - strict = FALSE + data = data2, + metacore = adlb_spec, + input_var = PARAMN, + out_var = PARAM, + codelist = get_control_term(adlb_spec, PARAMN), + decode_to_code = FALSE, + strict = FALSE ) |> - select(PARAMN, PARAM) |> - expect_equal(compare2) + select(PARAMN, PARAM) |> + expect_equal(compare2) # Test numeric variable used as input_var (strict == TRUE) create_var_from_codelist( - data = data2, - metacore = adlb_spec, - input_var = PARAMN, - out_var = PARAM, - codelist = get_control_term(adlb_spec, PARAMN), - decode_to_code = FALSE, - strict = TRUE + data = data2, + metacore = adlb_spec, + input_var = PARAMN, + out_var = PARAM, + codelist = get_control_term(adlb_spec, PARAMN), + decode_to_code = FALSE, + strict = TRUE ) |> - expect_warning() + expect_warning() # Test for Variable not in specs expect_error(create_var_from_codelist(data, spec, VAR2, FOO)) @@ -142,11 +142,11 @@ test_that("create_cat_var", { ">80", 92, ) - man_dat_labs <- tibble:: tribble( - ~AGEGR2, ~n, - "18-64 years", 42, - "65-80 years", 172, - ">80 years", 92, + man_dat_labs <- tibble::tribble( + ~AGEGR2, ~n, + "18-64 years", 42, + "65-80 years", 172, + ">80 years", 92, ) # Grouping col only auto_dat <- create_cat_var(dm, adsl_spec, AGE, AGEGR1) %>% @@ -167,68 +167,68 @@ test_that("create_cat_var", { # Grouping column and numeric decode, build from decode == TRUE decode_num_dat <- create_cat_var(dm, adsl_spec, AGE, AGEGR2, AGEGR2N, TRUE) decode_num_dat %>% - group_by(AGEGR2) %>% - dplyr::summarise(n = dplyr::n()) %>% - expect_equal(man_dat_labs) + group_by(AGEGR2) %>% + dplyr::summarise(n = dplyr::n()) %>% + expect_equal(man_dat_labs) decode_num_dat %>% - pull(AGEGR2N) %>% - unique() %>% - expect_equal(c(1:3)) + pull(AGEGR2N) %>% + unique() %>% + expect_equal(c(1:3)) # Test error 'unable to decipher group definition' bad_ct <- adsl_spec$codelist |> - filter(name == "AGEGR1") |> - pull(codes) |> - purrr::pluck(1)|> - tibble::add_row(code = "DUMMY", decode = "DUMMY") + filter(name == "AGEGR1") |> + pull(codes) |> + purrr::pluck(1) |> + tibble::add_row(code = "DUMMY", decode = "DUMMY") - codelist <- adsl_spec$codelist |> filter(name == 'AGEGR1') + codelist <- adsl_spec$codelist |> filter(name == "AGEGR1") codelist$codes <- list(bad_ct) spec2 <- suppressWarnings(metacore::metacore( - adsl_spec$ds_spec, - adsl_spec$ds_vars, - adsl_spec$var_spec, - adsl_spec$value_spec, - adsl_spec$derivations, - codelist = codelist, - supp = adsl_spec$supp + adsl_spec$ds_spec, + adsl_spec$ds_vars, + adsl_spec$var_spec, + adsl_spec$value_spec, + adsl_spec$derivations, + codelist = codelist, + supp = adsl_spec$supp )) %>% - select_dataset("ADSL", quiet = TRUE) + select_dataset("ADSL", quiet = TRUE) create_cat_var(dm, spec2, AGE, AGEGR1, AGEGR1N, TRUE) |> - expect_error("Unable to decipher the following group definition: DUMMY. Please check your controlled terminology.") + expect_error("Unable to decipher the following group definition: DUMMY. Please check your controlled terminology.") # Test error 'group definitions are not exclusive' bad_ct <- adsl_spec$codelist |> - filter(name == "AGEGR1") |> - pull(codes) |> - purrr::pluck(1)|> - tibble::add_row(code = "18-64", decode = "18-64 years") + filter(name == "AGEGR1") |> + pull(codes) |> + purrr::pluck(1) |> + tibble::add_row(code = "18-64", decode = "18-64 years") - codelist <- adsl_spec$codelist |> filter(name == 'AGEGR1') + codelist <- adsl_spec$codelist |> filter(name == "AGEGR1") codelist$codes <- list(bad_ct) spec2 <- suppressWarnings(metacore::metacore( - adsl_spec$ds_spec, - adsl_spec$ds_vars, - adsl_spec$var_spec, - adsl_spec$value_spec, - adsl_spec$derivations, - codelist = codelist, - supp = adsl_spec$supp + adsl_spec$ds_spec, + adsl_spec$ds_vars, + adsl_spec$var_spec, + adsl_spec$value_spec, + adsl_spec$derivations, + codelist = codelist, + supp = adsl_spec$supp )) %>% - select_dataset("ADSL", quiet = TRUE) + select_dataset("ADSL", quiet = TRUE) create_cat_var(dm, spec2, AGE, AGEGR1, AGEGR1N, create_from_decode = TRUE) |> - expect_error("Group definitions are not exclusive. Please check your controlled terminology") + expect_error("Group definitions are not exclusive. Please check your controlled terminology") # Test error 'value exists that is not defined in controlled terminology dm2 <- dm |> - tibble::add_row(AGE = 15) |> - tibble::add_row(AGE = 16) + tibble::add_row(AGE = 15) |> + tibble::add_row(AGE = 16) x <- create_cat_var(dm2, adsl_spec, AGE, AGEGR2, create_from_decode = TRUE) |> - expect_warning() + expect_warning() # Test errors expect_error(create_cat_var(dm, spec, AGE, ARM)) diff --git a/tests/testthat/test-labels.R b/tests/testthat/test-labels.R index 3597819..c70336c 100644 --- a/tests/testthat/test-labels.R +++ b/tests/testthat/test-labels.R @@ -1,134 +1,134 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) # Mock up metacore data starwars_short <- dplyr::starwars %>% select(1:5) var_spec <- tibble::tibble( - variable = names(starwars_short), - length = rep(1, 5), - label = c("Name", "Height", "Mass", "Hair Color", "Skin Color"), - type = c("text", "int", "int", "int", "text"), - format = rep(NA_character_, 5), - common = rep(FALSE, 5) + variable = names(starwars_short), + length = rep(1, 5), + label = c("Name", "Height", "Mass", "Hair Color", "Skin Color"), + type = c("text", "int", "int", "int", "text"), + format = rep(NA_character_, 5), + common = rep(FALSE, 5) ) ds_spec <- tibble::tibble( - dataset = "Starwars", - structure = c(""), - label = "Star Wars" + dataset = "Starwars", + structure = c(""), + label = "Star Wars" ) ds_vars <- tibble::tibble( - dataset = rep("Starwars", 5), - variable = names(starwars_short), - order = 1:5, - keep = rep(TRUE, 5), - key_seq = 1:5, - core = rep(NA_character_, 5), - supp_flag = rep(FALSE, 5) + dataset = rep("Starwars", 5), + variable = names(starwars_short), + order = 1:5, + keep = rep(TRUE, 5), + key_seq = 1:5, + core = rep(NA_character_, 5), + supp_flag = rep(FALSE, 5) ) value_spec <- tibble::tibble( - dataset = character(0), - variable = character(0), - type = character(0), - origin = character(0), - sig_dig = character(0), - code_id = character(0), - where = character(0), - derivation_id = character(0) + dataset = character(0), + variable = character(0), + type = character(0), + origin = character(0), + sig_dig = character(0), + code_id = character(0), + where = character(0), + derivation_id = character(0) ) derivations <- tibble::tibble( - derivation_id = character(0), - derivation = character(0) + derivation_id = character(0), + derivation = character(0) ) code_id <- tibble::tibble( - code_id = character(0), - name = character(0), - type = character(0), - codes = list() + code_id = character(0), + name = character(0), + type = character(0), + codes = list() ) # This is loud and I don't want it - just need the metacore object mc <- suppressWarnings( - suppressMessages( - metacore::metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_id) - ) + suppressMessages( + metacore::metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_id) + ) ) test_that("Check that add_labels applies labels properly", { - x <- mtcars %>% - add_labels( - mpg = "Miles Per Gallon", - cyl = "Cylinders" - ) - - expect_equal(attr(x$mpg, 'label'), "Miles Per Gallon") - expect_equal(attr(x$cyl, 'label'), "Cylinders") + x <- mtcars %>% + add_labels( + mpg = "Miles Per Gallon", + cyl = "Cylinders" + ) + + expect_equal(attr(x$mpg, "label"), "Miles Per Gallon") + expect_equal(attr(x$cyl, "label"), "Cylinders") }) test_that("Check that add_labels errors properly", { - expect_error(add_labels(TRUE, x = "label")) - expect_error(add_labels(mtcars, "label")) - expect_error(add_labels(mtcars, bad = "label")) - expect_error(add_labels(mtcars, mpg = 1)) + expect_error(add_labels(TRUE, x = "label")) + expect_error(add_labels(mtcars, "label")) + expect_error(add_labels(mtcars, bad = "label")) + expect_error(add_labels(mtcars, mpg = 1)) }) test_that("set_variable_labels applies labels properly", { + # Load in the metacore test object and example data + suppressMessages( + mc <- metacore::spec_to_metacore(metacore::metacore_example("p21_mock.xlsx"), quiet = TRUE) %>% + metacore::select_dataset("DM", quiet = TRUE) + ) + dm <- haven::read_xpt(metatools_example("dm.xpt")) - # Load in the metacore test object and example data - suppressMessages( - mc <- metacore::spec_to_metacore(metacore::metacore_example("p21_mock.xlsx"), quiet=TRUE) %>% - metacore::select_dataset("DM", quiet = TRUE) - ) - dm <- haven::read_xpt(metatools_example("dm.xpt")) - - # Set the variable labels - dm_labeled <- set_variable_labels(dm, mc) + # Set the variable labels + dm_labeled <- set_variable_labels(dm, mc) - # Pull out the data to check against - labs <- purrr::map_chr(names(dm_labeled), ~ attr(dm_labeled[[.]], 'label')) - attr(labs, 'label') <- 'Variable Label' # This is labelled in the metacore object + # Pull out the data to check against + labs <- purrr::map_chr(names(dm_labeled), ~ attr(dm_labeled[[.]], "label")) + attr(labs, "label") <- "Variable Label" # This is labelled in the metacore object - expect_equal(labs, mc$var_spec$label) + expect_equal(labs, mc$var_spec$label) }) test_that("set_variable_labels raises warnings properly", { - # This is metadata for the dplyr::starwars dataset - mc <- suppressWarnings( - suppressMessages( - metacore::metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_id) - ) - ) %>% select_dataset("Starwars", quiet = TRUE) - - starwars_short2 <- starwars_short - starwars_short2$new_var <- "" - - # Variables in data not in metadata - expect_warning(set_variable_labels(starwars_short2, mc)) - - mc <- suppressWarnings( - suppressMessages( - metacore::metacore(ds_spec, ds_vars[1:4, ], var_spec[1:4, ], value_spec, derivations, code_id) %>% - metacore::select_dataset("Starwars", quiet = TRUE) - ) - ) - expect_warning(set_variable_labels(starwars_short, mc)) - + # This is metadata for the dplyr::starwars dataset + mc <- suppressWarnings( + suppressMessages( + metacore::metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_id) + ) + ) %>% select_dataset("Starwars", quiet = TRUE) + + starwars_short2 <- starwars_short + starwars_short2$new_var <- "" + + # Variables in data not in metadata + expect_warning(set_variable_labels(starwars_short2, mc)) + + mc <- suppressWarnings( + suppressMessages( + metacore::metacore(ds_spec, ds_vars[1:4, ], var_spec[1:4, ], value_spec, derivations, code_id) %>% + metacore::select_dataset("Starwars", quiet = TRUE) + ) + ) + expect_warning(set_variable_labels(starwars_short, mc)) }) test_that("removal_labels works to remvoe all labels", { - data <- tibble::tibble(a = 1:5, - b = letters[1:5]) - data_lab <- data %>% - purrr::map2_dfr(c("apple", "pear"), function(x, y ){ - attr(x, "label") <- y - x - }) - remove_labels(data_lab) %>% - expect_equal(., data) - - expect_error(remove_labels(c(1:10))) + data <- tibble::tibble( + a = 1:5, + b = letters[1:5] + ) + data_lab <- data %>% + purrr::map2_dfr(c("apple", "pear"), function(x, y) { + attr(x, "label") <- y + x + }) + remove_labels(data_lab) %>% + expect_equal(., data) + + expect_error(remove_labels(c(1:10))) }) diff --git a/tests/testthat/test-sort.R b/tests/testthat/test-sort.R index 7e38088..91a21bc 100644 --- a/tests/testthat/test-sort.R +++ b/tests/testthat/test-sort.R @@ -1,5 +1,5 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) load(metacore::metacore_example("pilot_ADaM.rda")) spec <- metacore %>% select_dataset("ADSL", quiet = TRUE) @@ -9,19 +9,18 @@ test_that("sort_order", { select(AGE, SITEID, everything()) %>% order_cols(spec) %>% expect_equal(data) - # Check when too many columns - data %>% - select(AGE, everything(), -SITEID) %>% - order_cols(spec) %>% - expect_equal(select(data, -SITEID)) - - # Check when there are too few columns - data %>% - select(AGE, SITEID, everything()) %>% - mutate(foo = "game") %>% - order_cols(spec) %>% - expect_equal(mutate(data, foo = "game")) + # Check when too many columns + data %>% + select(AGE, everything(), -SITEID) %>% + order_cols(spec) %>% + expect_equal(select(data, -SITEID)) + # Check when there are too few columns + data %>% + select(AGE, SITEID, everything()) %>% + mutate(foo = "game") %>% + order_cols(spec) %>% + expect_equal(mutate(data, foo = "game")) }) test_that("sort_key", { @@ -34,10 +33,10 @@ test_that("sort_key", { test_that("order_cols warns about deprecated dataset_name parameter", { library(metacore) library(haven) - + load(metacore_example("pilot_ADaM.rda")) data <- read_xpt(metatools_example("adsl.xpt")) - + # Test that using dataset_name triggers deprecation warning lifecycle::expect_deprecated( order_cols(data, metacore, dataset_name = "ADSL") @@ -47,25 +46,25 @@ test_that("order_cols warns about deprecated dataset_name parameter", { test_that("order_cols still works with deprecated dataset_name parameter", { library(metacore) library(haven) - + load(metacore_example("pilot_ADaM.rda")) data <- read_xpt(metatools_example("adsl.xpt")) - + # Suppress warning to test functionality suppressWarnings({ result <- order_cols(data, metacore, dataset_name = "ADSL") }) - + expect_s3_class(result, "data.frame") }) test_that("sort_by_key warns about deprecated dataset_name parameter", { library(metacore) library(haven) - + load(metacore_example("pilot_ADaM.rda")) data <- read_xpt(metatools_example("adsl.xpt")) - + lifecycle::expect_deprecated( sort_by_key(data, metacore, dataset_name = "ADSL") ) @@ -74,13 +73,13 @@ test_that("sort_by_key warns about deprecated dataset_name parameter", { test_that("sort_by_key still works with deprecated dataset_name parameter", { library(metacore) library(haven) - + load(metacore_example("pilot_ADaM.rda")) data <- read_xpt(metatools_example("adsl.xpt")) - + suppressWarnings({ result <- sort_by_key(data, metacore, dataset_name = "ADSL") }) - + expect_s3_class(result, "data.frame") -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-supp.R b/tests/testthat/test-supp.R index 58e2263..fe95d01 100644 --- a/tests/testthat/test-supp.R +++ b/tests/testthat/test-supp.R @@ -1,197 +1,218 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) test_that("build_qnam", { - full_ae <- safetyData::sdtm_suppae %>% - select(-QORIG, -QEVAL, -QLABEL) %>% - pivot_wider(names_from = QNAM, values_from = QVAL) %>% - rename(AESEQ = IDVARVAL) %>% - select(-IDVAR) %>% - left_join(safetyData::sdtm_ae, . , by = c("STUDYID", "USUBJID", "AESEQ")) - - supp_fx <- build_qnam(full_ae, "AETRTEM", "TREATMENT EMERGENT FLAG", - "AESEQ", "CLINICAL STUDY SPONSOR", "DERIVED") %>% - select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL)%>% - arrange(USUBJID, IDVARVAL) - ex_supp <- arrange(safetyData::sdtm_suppae, USUBJID, IDVARVAL) - # Test standard example - expect_equal(supp_fx, ex_supp) - # Test without IDVAR making ambiguous output - expect_error(build_qnam(full_ae, "AETRTEM", "TREATMENT EMERGENT FLAG", - "", "CLINICAL STUDY SPONSOR", "DERIVED"), - "The combination of STUDYID, RDOMAIN, USUBJID, IDVARVAL, QNAM is ambiguous. Consider modifying the IDVAR") - # Test without IDVAR - supp_sans_id <- full_ae %>% - group_by(USUBJID) %>% - arrange(AESEQ) %>% - dplyr::slice(1) %>% - build_qnam("AETRTEM", "TREATMENT EMERGENT FLAG", - "", "CLINICAL STUDY SPONSOR", "DERIVED") %>% - select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL) %>% - arrange(USUBJID, IDVARVAL) - ex_supp_sans_id <- arrange(safetyData::sdtm_suppae, USUBJID, IDVARVAL) %>% - group_by(USUBJID) %>% - dplyr::slice(1) %>% - mutate(IDVAR = "", IDVARVAL = "") - expect_equal(supp_sans_id, ex_supp_sans_id) - + full_ae <- safetyData::sdtm_suppae %>% + select(-QORIG, -QEVAL, -QLABEL) %>% + pivot_wider(names_from = QNAM, values_from = QVAL) %>% + rename(AESEQ = IDVARVAL) %>% + select(-IDVAR) %>% + left_join(safetyData::sdtm_ae, ., by = c("STUDYID", "USUBJID", "AESEQ")) + + supp_fx <- build_qnam( + full_ae, "AETRTEM", "TREATMENT EMERGENT FLAG", + "AESEQ", "CLINICAL STUDY SPONSOR", "DERIVED" + ) %>% + select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL) %>% + arrange(USUBJID, IDVARVAL) + ex_supp <- arrange(safetyData::sdtm_suppae, USUBJID, IDVARVAL) + # Test standard example + expect_equal(supp_fx, ex_supp) + # Test without IDVAR making ambiguous output + expect_error( + build_qnam( + full_ae, "AETRTEM", "TREATMENT EMERGENT FLAG", + "", "CLINICAL STUDY SPONSOR", "DERIVED" + ), + "The combination of STUDYID, RDOMAIN, USUBJID, IDVARVAL, QNAM is ambiguous. Consider modifying the IDVAR" + ) + # Test without IDVAR + supp_sans_id <- full_ae %>% + group_by(USUBJID) %>% + arrange(AESEQ) %>% + dplyr::slice(1) %>% + build_qnam( + "AETRTEM", "TREATMENT EMERGENT FLAG", + "", "CLINICAL STUDY SPONSOR", "DERIVED" + ) %>% + select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL) %>% + arrange(USUBJID, IDVARVAL) + ex_supp_sans_id <- arrange(safetyData::sdtm_suppae, USUBJID, IDVARVAL) %>% + group_by(USUBJID) %>% + dplyr::slice(1) %>% + mutate(IDVAR = "", IDVARVAL = "") + expect_equal(supp_sans_id, ex_supp_sans_id) }) test_that("make_supp_qual", { - load(metacore::metacore_example("pilot_SDTM.rda")) - - spec<- metacore %>% - select_dataset("AE", quiet = TRUE) - - # Add the mock supp variables - ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) - - metacore_supp <- make_supp_qual(ae, spec) %>% - arrange(USUBJID, QNAM, IDVARVAL) %>% - as_tibble() - - man_supp <- ae %>% - select(STUDYID, USUBJID, RDOMAIN = DOMAIN, IDVARVAL = AESEQ, AETRTEM) %>% - tidyr::pivot_longer(AETRTEM, names_to = "QNAM", values_to = "QVAL") %>% - filter(!is.na(QVAL)) %>% - mutate(IDVAR = "AESEQ", - QORIG = "derived", - QEVAL = "CLINICAL STUDY SPONSOR", - QLABEL = "TREATMENT EMERGENT FLAG") %>% - arrange(USUBJID, QNAM, IDVARVAL) %>% - select(STUDYID, RDOMAIN, USUBJID, IDVAR, - IDVARVAL, QNAM , QLABEL,QVAL, QORIG, QEVAL) %>% - distinct() - - #Testing normal circumstances - expect_equal(metacore_supp, man_supp) - - # Add the supp without a idvar - dm_spec <- select_dataset(metacore, "DM", quiet = TRUE) - dm <- combine_supp(safetyData::sdtm_dm, safetyData::sdtm_suppdm) %>% - as_tibble() - dm_supp <- make_supp_qual(dm, dm_spec) - man_dm_supp <- safetyData::sdtm_suppdm %>% - as_tibble() %>% - mutate(IDVAR = as.character(IDVAR), - IDVARVAL = as.character(IDVARVAL), - QORIG = tolower(QORIG)) %>% - select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL) - expect_equal(dm_supp, man_dm_supp) - - #Testing with blank rows - supp_with_miss <- dm %>% - dplyr::bind_rows(tibble::tibble(STUDYID = "CDISCPILOT01", - DOMAIN = "DM", - USUBJID = "01-701-9999", - SUBJID = 9999, - ITT = "")) - expect_message(make_supp_qual(supp_with_miss, dm_spec), - "Empty QVAL rows removed for QNAM = ITT") - - suppressMessages(make_supp_qual(supp_with_miss, dm_spec)) %>% - expect_equal(man_dm_supp) - - - # Testing with too many datasets - expect_error(make_supp_qual(ae, metacore)) - #Testing without supp columns specified - metacore_old <- metacore::spec_to_metacore(metacore::metacore_example("SDTM_spec_CDISC_pilot.xlsx"), quiet = TRUE) - ae_spec <- select_dataset(metacore_old, "AE", quiet = TRUE) - expect_error(make_supp_qual(ae, ae_spec), - "No supplemental variables specified in metacore object. Please check your specifications") + load(metacore::metacore_example("pilot_SDTM.rda")) + + spec <- metacore %>% + select_dataset("AE", quiet = TRUE) + + # Add the mock supp variables + ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) + + metacore_supp <- make_supp_qual(ae, spec) %>% + arrange(USUBJID, QNAM, IDVARVAL) %>% + as_tibble() + + man_supp <- ae %>% + select(STUDYID, USUBJID, RDOMAIN = DOMAIN, IDVARVAL = AESEQ, AETRTEM) %>% + tidyr::pivot_longer(AETRTEM, names_to = "QNAM", values_to = "QVAL") %>% + filter(!is.na(QVAL)) %>% + mutate( + IDVAR = "AESEQ", + QORIG = "derived", + QEVAL = "CLINICAL STUDY SPONSOR", + QLABEL = "TREATMENT EMERGENT FLAG" + ) %>% + arrange(USUBJID, QNAM, IDVARVAL) %>% + select( + STUDYID, RDOMAIN, USUBJID, IDVAR, + IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL + ) %>% + distinct() + + # Testing normal circumstances + expect_equal(metacore_supp, man_supp) + + # Add the supp without a idvar + dm_spec <- select_dataset(metacore, "DM", quiet = TRUE) + dm <- combine_supp(safetyData::sdtm_dm, safetyData::sdtm_suppdm) %>% + as_tibble() + dm_supp <- make_supp_qual(dm, dm_spec) + man_dm_supp <- safetyData::sdtm_suppdm %>% + as_tibble() %>% + mutate( + IDVAR = as.character(IDVAR), + IDVARVAL = as.character(IDVARVAL), + QORIG = tolower(QORIG) + ) %>% + select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG, QEVAL) + expect_equal(dm_supp, man_dm_supp) + + # Testing with blank rows + supp_with_miss <- dm %>% + dplyr::bind_rows(tibble::tibble( + STUDYID = "CDISCPILOT01", + DOMAIN = "DM", + USUBJID = "01-701-9999", + SUBJID = 9999, + ITT = "" + )) + expect_message( + make_supp_qual(supp_with_miss, dm_spec), + "Empty QVAL rows removed for QNAM = ITT" + ) + + suppressMessages(make_supp_qual(supp_with_miss, dm_spec)) %>% + expect_equal(man_dm_supp) + + + # Testing with too many datasets + expect_error(make_supp_qual(ae, metacore)) + # Testing without supp columns specified + metacore_old <- metacore::spec_to_metacore(metacore::metacore_example("SDTM_spec_CDISC_pilot.xlsx"), quiet = TRUE) + ae_spec <- select_dataset(metacore_old, "AE", quiet = TRUE) + expect_error( + make_supp_qual(ae, ae_spec), + "No supplemental variables specified in metacore object. Please check your specifications" + ) }) test_that("combine_supp", { - ### 1 IDVAR and 1 QNAM - combo_ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) %>% - select(USUBJID, AESEQ, AETRTEM) %>% - distinct() %>% - arrange(USUBJID, AESEQ) - supp_check <- safetyData::sdtm_suppae %>% - select(USUBJID, AESEQ = IDVARVAL, AETRTEM = QVAL) %>% - arrange(USUBJID, AESEQ) - attr(supp_check$AETRTEM, "label") <- 'TREATMENT EMERGENT FLAG' - expect_equal(combo_ae, supp_check) - - ### No IDVAR and multiple QNAM - out_test <- safetyData::sdtm_suppdm %>% - filter(USUBJID %in% c("01-701-1015")) %>% - select(USUBJID, QNAM, QVAL) %>% - pivot_wider(names_from = QNAM, values_from = QVAL) %>% - as.data.frame() - attr(out_test$COMPLT16, "label") <- 'Completers of Week 16 Population Flag' - attr(out_test$COMPLT24, "label") <- 'Completers of Week 24 Population Flag' - attr(out_test$COMPLT8, "label") <- 'Completers of Week 8 Population Flag' - attr(out_test$EFFICACY, "label") <- 'Efficacy Population Flag' - attr(out_test$ITT, "label") <- 'Intent to Treat Population Flag' - attr(out_test$SAFETY, "label") <- 'Safety Population Flag' - - full_dm <- combine_supp(safetyData::sdtm_dm, safetyData::sdtm_suppdm) %>% - select(USUBJID, COMPLT16:SAFETY) - expect_equal(filter(full_dm, USUBJID == "01-701-1015"), out_test) - # Test SUBJID that wasn't in the SUPP that all supp values are NA - full_dm %>% - filter(USUBJID == "01-701-1057") %>% - select(-USUBJID) %>% - tidyr::pivot_longer(everything())%>% - dplyr::summarise(test = all(is.na(value))) %>% - expect_equal(tibble::tibble(test = TRUE)) - - ### Where there are only value for a small number of subjects - mostly_miss <- combine_supp(safetyData::sdtm_ds, safetyData::sdtm_suppds) - original <- safetyData::sdtm_suppds %>% + ### 1 IDVAR and 1 QNAM + combo_ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) %>% + select(USUBJID, AESEQ, AETRTEM) %>% + distinct() %>% + arrange(USUBJID, AESEQ) + supp_check <- safetyData::sdtm_suppae %>% + select(USUBJID, AESEQ = IDVARVAL, AETRTEM = QVAL) %>% + arrange(USUBJID, AESEQ) + attr(supp_check$AETRTEM, "label") <- "TREATMENT EMERGENT FLAG" + expect_equal(combo_ae, supp_check) + + ### No IDVAR and multiple QNAM + out_test <- safetyData::sdtm_suppdm %>% + filter(USUBJID %in% c("01-701-1015")) %>% + select(USUBJID, QNAM, QVAL) %>% + pivot_wider(names_from = QNAM, values_from = QVAL) %>% + as.data.frame() + attr(out_test$COMPLT16, "label") <- "Completers of Week 16 Population Flag" + attr(out_test$COMPLT24, "label") <- "Completers of Week 24 Population Flag" + attr(out_test$COMPLT8, "label") <- "Completers of Week 8 Population Flag" + attr(out_test$EFFICACY, "label") <- "Efficacy Population Flag" + attr(out_test$ITT, "label") <- "Intent to Treat Population Flag" + attr(out_test$SAFETY, "label") <- "Safety Population Flag" + + full_dm <- combine_supp(safetyData::sdtm_dm, safetyData::sdtm_suppdm) %>% + select(USUBJID, COMPLT16:SAFETY) + expect_equal(filter(full_dm, USUBJID == "01-701-1015"), out_test) + # Test SUBJID that wasn't in the SUPP that all supp values are NA + full_dm %>% + filter(USUBJID == "01-701-1057") %>% + select(-USUBJID) %>% + tidyr::pivot_longer(everything()) %>% + dplyr::summarise(test = all(is.na(value))) %>% + expect_equal(tibble::tibble(test = TRUE)) + + ### Where there are only value for a small number of subjects + mostly_miss <- combine_supp(safetyData::sdtm_ds, safetyData::sdtm_suppds) + original <- safetyData::sdtm_suppds %>% + arrange(USUBJID) %>% + pull(QVAL) + attr(original, "label") <- "PROTOCOL ENTRY CRITERIA NOT MET" + expect_equal( + mostly_miss %>% + filter(!is.na(ENTCRIT)) %>% arrange(USUBJID) %>% - pull(QVAL) - attr(original, "label") <- 'PROTOCOL ENTRY CRITERIA NOT MET' - expect_equal(mostly_miss %>% - filter(!is.na(ENTCRIT)) %>% - arrange(USUBJID) %>% - pull(ENTCRIT), - original) - - ### Multiple IDVARS and multiple QNAMS - # Add some mock supp variables - ae <- safetyData::sdtm_ae %>% - mutate( - SUPPVAR1 = letters[1:nrow(safetyData::sdtm_ae)], - SUPPVAR2 = rep(letters, 36)[1:nrow(safetyData::sdtm_ae)], - SUPPVAR3 = USUBJID, - IDVAR = as.numeric(str_extract(USUBJID, "\\d{3}$")) - ) - attr(ae$SUPPVAR1, "label") <- "Supp Test 1" - attr(ae$SUPPVAR2, "label") <- "Supp Test 2" - attr(ae$SUPPVAR3, "label") <- "Supp Test 3" - ### Mock up a metadata necessary to make the SUPP - supp_meta <- tibble::tribble( - ~qnam, ~qlabel, ~idvar, ~qeval, ~qorig, - "SUPPVAR1", "Supp Test 1", "AESEQ", "Investigator", "CRF", - "SUPPVAR2", "Supp Test 2", "AESEQ", "Investigator", "CRF", - "SUPPVAR3", "Supp Test 3", "IDVAR", "Investigator", "CRF", - ) - - ### Wrap and map - suppae <- pmap_dfr(supp_meta, build_qnam, dataset=ae) %>% - arrange(USUBJID, QNAM, IDVARVAL) - - dataset = ae %>% - select(-starts_with("SUPP")) - supp = suppae - - multi_out <- combine_supp(dataset, suppae) - expect_equal(multi_out$SUPPVAR1, ae$SUPPVAR1) - expect_equal(multi_out$SUPPVAR2, ae$SUPPVAR2) - expect_equal(multi_out$SUPPVAR3, ae$SUPPVAR3) + pull(ENTCRIT), + original + ) + + ### Multiple IDVARS and multiple QNAMS + # Add some mock supp variables + ae <- safetyData::sdtm_ae %>% + mutate( + SUPPVAR1 = letters[1:nrow(safetyData::sdtm_ae)], + SUPPVAR2 = rep(letters, 36)[1:nrow(safetyData::sdtm_ae)], + SUPPVAR3 = USUBJID, + IDVAR = as.numeric(str_extract(USUBJID, "\\d{3}$")) + ) + attr(ae$SUPPVAR1, "label") <- "Supp Test 1" + attr(ae$SUPPVAR2, "label") <- "Supp Test 2" + attr(ae$SUPPVAR3, "label") <- "Supp Test 3" + ### Mock up a metadata necessary to make the SUPP + supp_meta <- tibble::tribble( + ~qnam, ~qlabel, ~idvar, ~qeval, ~qorig, + "SUPPVAR1", "Supp Test 1", "AESEQ", "Investigator", "CRF", + "SUPPVAR2", "Supp Test 2", "AESEQ", "Investigator", "CRF", + "SUPPVAR3", "Supp Test 3", "IDVAR", "Investigator", "CRF", + ) + + ### Wrap and map + suppae <- pmap_dfr(supp_meta, build_qnam, dataset = ae) %>% + arrange(USUBJID, QNAM, IDVARVAL) + + dataset <- ae %>% + select(-starts_with("SUPP")) + supp <- suppae + + multi_out <- combine_supp(dataset, suppae) + expect_equal(multi_out$SUPPVAR1, ae$SUPPVAR1) + expect_equal(multi_out$SUPPVAR2, ae$SUPPVAR2) + expect_equal(multi_out$SUPPVAR3, ae$SUPPVAR3) }) test_that("combine_supp works with different IDVARVAL classes", { - skip_if_not_installed("pharmaversesdtm") - expect_equal( - combine_supp(pharmaversesdtm::ae, pharmaversesdtm::suppae) %>% - pull(AESEQ), - pharmaversesdtm::ae %>% pull(AESEQ) - ) + skip_if_not_installed("pharmaversesdtm") + expect_equal( + combine_supp(pharmaversesdtm::ae, pharmaversesdtm::suppae) %>% + pull(AESEQ), + pharmaversesdtm::ae %>% pull(AESEQ) + ) }) test_that("combine_supp works with without QEVAL", { @@ -200,36 +221,38 @@ test_that("combine_supp works with without QEVAL", { }) test_that("supp data that does not match the main data will raise a warning", { - sdtm_suppae_extra <- safetyData::sdtm_suppae - sdtm_suppae_extra$IDVARVAL[1] <- 99 - expect_error( - combine_supp(safetyData::sdtm_ae, sdtm_suppae_extra) - ) + sdtm_suppae_extra <- safetyData::sdtm_suppae + sdtm_suppae_extra$IDVARVAL[1] <- 99 + expect_error( + combine_supp(safetyData::sdtm_ae, sdtm_suppae_extra) + ) }) test_that("Floating point correction works", { - fp1 = 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 - sdtm_ae_fp <- safetyData::sdtm_ae %>% - mutate(AESEQ = case_when(AESEQ == 1 ~ fp1, - TRUE ~ as.double(AESEQ))) - # correction - combo_ae <-combine_supp(sdtm_ae_fp, safetyData::sdtm_suppae) %>% - select(USUBJID, AESEQ, AETRTEM) %>% - distinct() %>% - arrange(USUBJID, AESEQ) - supp_check <- safetyData::sdtm_suppae %>% - select(USUBJID, AESEQ = IDVARVAL, AETRTEM = QVAL) %>% - arrange(USUBJID, AESEQ) - attr(supp_check$AETRTEM, "label") <- 'TREATMENT EMERGENT FLAG' - expect_equal(combo_ae, supp_check) + fp1 <- 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + sdtm_ae_fp <- safetyData::sdtm_ae %>% + mutate(AESEQ = case_when( + AESEQ == 1 ~ fp1, + TRUE ~ as.double(AESEQ) + )) + # correction + combo_ae <- combine_supp(sdtm_ae_fp, safetyData::sdtm_suppae) %>% + select(USUBJID, AESEQ, AETRTEM) %>% + distinct() %>% + arrange(USUBJID, AESEQ) + supp_check <- safetyData::sdtm_suppae %>% + select(USUBJID, AESEQ = IDVARVAL, AETRTEM = QVAL) %>% + arrange(USUBJID, AESEQ) + attr(supp_check$AETRTEM, "label") <- "TREATMENT EMERGENT FLAG" + expect_equal(combo_ae, supp_check) }) test_that("zero-row supp returns data unchanged with a warning (#45)", { - expect_warning( - result <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae[0,]), - regexp = "Zero rows in supp, returning original dataset unchanged" - ) - expect_equal(result, safetyData::sdtm_ae) + expect_warning( + result <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae[0, ]), + regexp = "Zero rows in supp, returning original dataset unchanged" + ) + expect_equal(result, safetyData::sdtm_ae) }) test_that("multiple different IDVAR map to the same QNAM works", { @@ -241,7 +264,7 @@ test_that("multiple different IDVAR map to the same QNAM works", { simple_suppae$IDVARVAL[2] <- "2012-09-02" expect_equal( combine_supp(simple_ae, supp = simple_suppae)$AETRTEM, - structure(c("Y", NA, NA, NA, NA, NA, "Y"), label = 'TREATMENT EMERGENT FLAG') + structure(c("Y", NA, NA, NA, NA, NA, "Y"), label = "TREATMENT EMERGENT FLAG") ) # Replace the value in error @@ -276,20 +299,20 @@ test_that("combine_supp() does not create an IDVARVAL column (#78)", { test_that("make_supp_qual handles deprecated dataset_name parameter", { load(metacore::metacore_example("pilot_SDTM.rda")) ae <- combine_supp(safetyData::sdtm_ae, safetyData::sdtm_suppae) - + # Test that dataset_name is deprecated and shows guidance suppressWarnings({ result <- make_supp_qual(ae, metacore, dataset_name = "AE") }) - + expect_s3_class(result, "data.frame") }) test_that("combine_supp handles QNAM not in dataset columns", { simple_ae <- safetyData::sdtm_ae[1:5, ] simple_suppae <- safetyData::sdtm_suppae[1, ] - simple_suppae$QNAM <- "NEWCOL" # A new column to add - + simple_suppae$QNAM <- "NEWCOL" # A new column to add + # Should successfully add the new column result <- combine_supp(simple_ae, simple_suppae) expect_true("NEWCOL" %in% names(result)) @@ -297,9 +320,9 @@ test_that("combine_supp handles QNAM not in dataset columns", { test_that("combine_supp errors when QNAM already exists in dataset", { simple_ae <- safetyData::sdtm_ae[1:5, ] - simple_ae$AETRTEM <- "existing" # Add column that matches QNAM + simple_ae$AETRTEM <- "existing" # Add column that matches QNAM simple_suppae <- safetyData::sdtm_suppae[1, ] - + expect_error( combine_supp(simple_ae, simple_suppae), "already in the original dataset" @@ -309,8 +332,8 @@ test_that("combine_supp errors when QNAM already exists in dataset", { test_that("combine_supp handles IDVAR not in dataset", { simple_ae <- safetyData::sdtm_ae[1:5, ] simple_suppae <- safetyData::sdtm_suppae[1, ] - simple_suppae$IDVAR <- "FAKEIDVAR" # IDVAR that doesn't exist - + simple_suppae$IDVAR <- "FAKEIDVAR" # IDVAR that doesn't exist + expect_error( combine_supp(simple_ae, simple_suppae), "replacement has 0 rows" @@ -321,7 +344,7 @@ test_that("combine_supp_by_idvar detects conflicting replacements across IDVARs" simple_ae <- safetyData::sdtm_ae %>% filter(USUBJID %in% c("01-701-1015", "01-701-1023")) %>% mutate(NEWID = dplyr::row_number()) - + # Create supp with same QNAM but different IDVARs that would cause conflicts suppae_conflict <- bind_rows( data.frame( @@ -341,17 +364,17 @@ test_that("combine_supp_by_idvar detects conflicting replacements across IDVARs" STUDYID = "CDISCPILOT01", RDOMAIN = "AE", USUBJID = "01-701-1015", - IDVAR = "NEWID", + IDVAR = "NEWID", IDVARVAL = "1", QNAM = "TESTVAR", QLABEL = "Test Variable", - QVAL = "ValueB", # Different value for same subject/QNAM + QVAL = "ValueB", # Different value for same subject/QNAM QORIG = "CRF", QEVAL = "", stringsAsFactors = FALSE ) ) - + expect_error( combine_supp(simple_ae, suppae_conflict), "unexpected number of rows" diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 5bf7ac2..eb3653e 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,5 +1,5 @@ # Suppress cli output during testing -options(cli.default_handler = function(...) { }) +options(cli.default_handler = function(...) {}) test_that("metatools_example", { expect_equal(metatools_example(), c("adsl.xpt", "dm.xpt")) @@ -15,7 +15,7 @@ test_that("metatools_example", { }) test_that("make_lone_dataset", { - load(metacore::metacore_example("pilot_ADaM.rda")) - # Test deprecated function. Deprecated warning suppressed. - suppressWarnings(expect_error(make_lone_dataset(metacore, NULL))) + load(metacore::metacore_example("pilot_ADaM.rda")) + # Test deprecated function. Deprecated warning suppressed. + suppressWarnings(expect_error(make_lone_dataset(metacore, NULL))) })