add checks in functionl; add unit tests

ChloeYou · ChloeYou · commit 74aed7a12762 · 2022-05-31T16:05:44.000-07:00
diff --git a/R/get_test_data.R b/R/get_test_data.R
@@ -23,9 +23,12 @@
 
 get_test_data <- function(recipe, x){
   # TO-DO: SOME CHECKS OF THE DATASET
-  ## CHECK geo_value, time_value exists
+  if (any(!(c('geo_value','time_value') %in% colnames(x)))) {
+    rlang::abort("`geo_value`, `time_value` does not exist in data")
+  }
   ## CHECK if it is epi_df?
 
+
   # initialize vector to hold max lags for each variable
   max_lags <- c()
   for(i in c(1:length(recipe$steps))){
@@ -34,9 +37,14 @@ get_test_data <- function(recipe, x){
     }
   }
 
+  # CHECK: Return NA if insufficient training data
+  if (dplyr::n_distinct(x$time_value)< max(max_lags)) {
+    stop("insufficient training data")
+  }
+
   test_data <- x %>%
     dplyr::filter(
-      dplyr::across(
+      dplyr::if_any(
         .cols = recipe$term_info$variable[which(recipe$var_info$role == 'raw')],
         .fns = ~ !is.na(.x)
       )
diff --git a/tests/testthat/test-get_test_data.R b/tests/testthat/test-get_test_data.R
@@ -0,0 +1,37 @@
+test_that("return expected number of rows", {
+  r <- epi_recipe(case_death_rate_subset) %>%
+    step_epi_ahead(death_rate, ahead = 7) %>%
+    step_epi_lag(death_rate, lag = c(0, 7, 14, 21, 28)) %>%
+    step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
+    step_naomit(all_predictors()) %>%
+    step_naomit(all_outcomes(), skip = TRUE)
+
+  test <- get_test_data(recipe = r, x = case_death_rate_subset)
+
+  expect_equal(nrow(test),
+               dplyr::n_distinct(case_death_rate_subset$geo_value)* 29)
+})
+
+
+test_that("expect insufficient training data error", {
+  r <- epi_recipe(case_death_rate_subset) %>%
+    step_epi_ahead(death_rate, ahead = 7) %>%
+    step_epi_lag(death_rate, lag = c(0, 367)) %>%
+    step_naomit(all_predictors()) %>%
+    step_naomit(all_outcomes(), skip = TRUE)
+
+  expect_error(get_test_data(recipe = r, x = case_death_rate_subset))
+})
+
+test_that("expect error that geo_value or time_value does not exist", {
+  r <-  epi_recipe(case_death_rate_subset) %>%
+    step_epi_ahead(death_rate, ahead = 7) %>%
+    step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
+    step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
+    step_naomit(all_predictors()) %>%
+    step_naomit(all_outcomes(), skip = TRUE)
+
+  wrong_epi_df <- case_death_rate_subset %>% dplyr::select(-geo_value)
+
+  expect_error(get_test_data(recipe = r, x = wrong_epi_df))
+})