|
1 | 1 | suppressPackageStartupMessages(source(here::here("R", "load_all.R"))) |
2 | 2 |
|
| 3 | +test_that("Yeo-Johnson transformation inverts correctly", { |
| 4 | + expect_true( |
| 5 | + map_lgl(seq(-5, 5, 0.1), function(lambda) { |
| 6 | + map_lgl(seq(0, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all() |
| 7 | + }) %>% |
| 8 | + all() |
| 9 | + ) |
| 10 | +}) |
3 | 11 |
|
4 | | -# Real data test |
5 | | -Sys.setenv(TAR_PROJECT = "flu_hosp_explore") |
| 12 | +test_that("Yeo-Johnson steps and layers invert each other", { |
| 13 | + jhu <- cases_deaths_subset %>% |
| 14 | + filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>% |
| 15 | + select(geo_value, time_value, cases) |
| 16 | + filtered_data <- jhu |
6 | 17 |
|
| 18 | + # Get some lambda values |
| 19 | + r <- epi_recipe(filtered_data) %>% |
| 20 | + step_epi_YeoJohnson(cases) %>% |
| 21 | + step_epi_lag(cases, lag = 0) %>% |
| 22 | + step_epi_ahead(cases, ahead = 0, role = "outcome") %>% |
| 23 | + step_epi_naomit() |
| 24 | + tr <- r %>% prep(filtered_data) |
7 | 25 |
|
8 | | -# Transform with Yeo-Johnson |
9 | | -data <- tar_read(joined_archive_data) %>% |
10 | | - epix_as_of(as.Date("2023-11-08")) |
11 | | -state_geo_values <- data %>% filter(source == "nhsn") %>% pull(geo_value) %>% unique() |
12 | | -filtered_data <- data %>% |
13 | | - filter(geo_value %in% state_geo_values) %>% |
14 | | - select(geo_value, source, time_value, hhs) |
15 | | -r <- epi_recipe(filtered_data) %>% |
16 | | - step_epi_YeoJohnson(hhs) %>% |
17 | | - prep(filtered_data) |
18 | | -r |
19 | | -# Inspect the lambda values (a few states have default lambda = 0.25, because |
20 | | -# they have issues) |
21 | | -r$steps[[1]]$lambdas %>% print(n = 55) |
22 | | -out1 <- r %>% bake(filtered_data) |
| 26 | + # Check general lambda values tibble structure |
| 27 | + expect_true("lambda_cases" %in% names(tr$steps[[1]]$lambdas)) |
| 28 | + expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_cases)) |
| 29 | + # Still works on a tibble |
| 30 | + expect_equal( |
| 31 | + tr %>% bake(filtered_data %>% as_tibble()), |
| 32 | + tr %>% bake(filtered_data) |
| 33 | + ) |
23 | 34 |
|
24 | | -# Transform with manual whitening (quarter root scaling) |
25 | | -# learned_params <- calculate_whitening_params(filtered_data, "hhs", scale_method = "none", center_method = "none", nonlin_method = "quart_root") |
26 | | -out2 <- filtered_data %>% |
27 | | - mutate(hhs = (hhs + 0.01)^(1 / 4)) |
| 35 | + # Make sure that the inverse transformation works |
| 36 | + f <- frosting() %>% |
| 37 | + layer_predict() %>% |
| 38 | + layer_epi_YeoJohnson(.pred) |
| 39 | + wf <- epi_workflow(r, linear_reg()) %>% |
| 40 | + fit(filtered_data) %>% |
| 41 | + add_frosting(f) |
| 42 | + out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value) |
| 43 | + out2 <- forecast(wf) %>% rename(cases = .pred) |
| 44 | + expect_equal(out1, out2) |
28 | 45 |
|
29 | | -out1 %>% |
30 | | - left_join(out2, by = c("geo_value", "source", "time_value")) %>% |
31 | | - mutate(hhs_diff = hhs.x - hhs.y) %>% |
32 | | - ggplot(aes(time_value, hhs_diff)) + |
33 | | - geom_line() + |
34 | | - facet_wrap(~geo_value, scales = "free_y") + |
35 | | - theme_minimal() + |
36 | | - labs(title = "Yeo-Johnson transformation", x = "Time", y = "HHS") |
| 46 | + # Make sure it works when there are multiple predictors and outcomes |
| 47 | + jhu_multi <- epidatasets::covid_case_death_rates_extended %>% |
| 48 | + filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>% |
| 49 | + select(geo_value, time_value, case_rate, death_rate) |
| 50 | + filtered_data <- jhu_multi |
| 51 | + r <- epi_recipe(filtered_data) %>% |
| 52 | + step_epi_YeoJohnson(case_rate, death_rate) %>% |
| 53 | + step_epi_lag(case_rate, death_rate, lag = 0) %>% |
| 54 | + step_epi_ahead(case_rate, death_rate, ahead = 0, role = "outcome") %>% |
| 55 | + step_epi_naomit() |
| 56 | + tr <- r %>% prep(filtered_data) |
37 | 57 |
|
38 | | -# Plot the real data before and after transformation |
39 | | -geo_filter <- "ca" |
40 | | -filtered_data %>% |
41 | | - filter(geo_value == geo_filter, source == "nhsn") %>% |
42 | | - mutate(hhs = log(hhs)) %>% |
43 | | - ggplot(aes(time_value, hhs)) + |
44 | | - geom_line(color = "blue") + |
45 | | - geom_line(data = out1 %>% filter(geo_value == geo_filter, source == "nhsn") %>% mutate(hhs = log(hhs)), aes(time_value, hhs), color = "green") + |
46 | | - geom_line(data = out2 %>% filter(geo_value == geo_filter, source == "nhsn") %>% mutate(hhs = log(hhs)), aes(time_value, hhs), color = "red") + |
47 | | - theme_minimal() + |
48 | | - labs(title = "Yeo-Johnson transformation", x = "Time", y = "HHS") |
| 58 | + # Check general lambda values tibble structure |
| 59 | + expect_true("lambda_case_rate" %in% names(tr$steps[[1]]$lambdas)) |
| 60 | + expect_true("lambda_death_rate" %in% names(tr$steps[[1]]$lambdas)) |
| 61 | + expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_case_rate)) |
| 62 | + expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_death_rate)) |
49 | 63 |
|
| 64 | + # TODO: Make sure that the inverse transformation works |
| 65 | + f <- frosting() %>% |
| 66 | + layer_predict() %>% |
| 67 | + layer_epi_YeoJohnson(.pred_ahead_0_case_rate) |
| 68 | + wf <- epi_workflow(r, linear_reg()) %>% |
| 69 | + fit(filtered_data) %>% |
| 70 | + add_frosting(f) |
| 71 | + out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value) |
| 72 | + # debugonce(slather.layer_epi_YeoJohnson) |
| 73 | + out2 <- forecast(wf) %>% rename(case_rate = .pred) |
| 74 | + expect_equal(out1, out2) |
| 75 | +}) |
50 | 76 |
|
51 | | -# TODO: Test this. |
52 | | -## Layer Yeo-Johnson2 |
53 | | -postproc <- frosting() %>% |
54 | | - layer_epi_YeoJohnson() |
| 77 | +test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", { |
| 78 | + jhu <- cases_deaths_subset %>% |
| 79 | + filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>% |
| 80 | + select(geo_value, time_value, cases) |
| 81 | + filtered_data <- jhu |
55 | 82 |
|
56 | | -wf <- epi_workflow(r) %>% |
57 | | - fit(data) %>% |
58 | | - add_frosting(postproc) |
| 83 | + # Get some lambda values |
| 84 | + r <- epi_recipe(filtered_data) %>% |
| 85 | + step_epi_YeoJohnson(cases) %>% |
| 86 | + step_epi_lag(cases, lag = 0) %>% |
| 87 | + step_epi_ahead(cases, ahead = 0, role = "outcome") %>% |
| 88 | + step_epi_naomit() |
| 89 | + tr <- r %>% prep(filtered_data) |
| 90 | + # Check for fixed lambda values |
| 91 | + expect_true(all(near(tr$steps[[1]]$lambdas$lambda_cases, c(0.856, 0.207), tol = 0.001))) |
59 | 92 |
|
60 | | - |
61 | | -# Test inverse transformation |
62 | | -map_lgl(seq(-5, 5, 0.1), function(lambda) { |
63 | | - map_lgl(seq(0, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all() |
64 | | -}) %>% |
65 | | - all() |
| 93 | + # Make sure that the inverse transformation works |
| 94 | + f <- frosting() %>% |
| 95 | + layer_predict() %>% |
| 96 | + layer_epi_YeoJohnson(.pred) |
| 97 | + wf <- epi_workflow(r, linear_reg()) %>% |
| 98 | + fit(filtered_data) %>% |
| 99 | + add_frosting(f) |
| 100 | + out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value) |
| 101 | + out2 <- forecast(wf) %>% rename(cases = .pred) |
| 102 | + expect_equal(out1, out2) |
| 103 | +}) |
0 commit comments