@@ -58,9 +58,9 @@ medical insurance claims and the number of new confirmed COVID-19 cases per
5858
5959``` {r grab-epi-data}
6060# Select the `percent_cli` column from the data archive
61- doctor_visits <- archive_cases_dv_subset$DT %>%
62- select(geo_value, time_value, version, percent_cli) %>%
63- drop_na(percent_cli) %>%
61+ doctor_visits <- archive_cases_dv_subset$DT |>
62+ select(geo_value, time_value, version, percent_cli) |>
63+ tidyr:: drop_na(percent_cli) |>
6464 as_epi_archive(compactify = TRUE)
6565```
6666
@@ -77,8 +77,8 @@ doctor_visits <- pub_covidcast(
7777 geo_values = "ca,fl,ny,tx",
7878 time_values = epirange(20200601, 20211201),
7979 issues = epirange(20200601, 20211201)
80- ) %>%
81- rename(version = issue, percent_cli = value) %>%
80+ ) |>
81+ rename(version = issue, percent_cli = value) |>
8282 as_epi_archive(compactify = TRUE)
8383```
8484
@@ -99,20 +99,20 @@ percent_cli_data <- bind_rows(
9999 # Snapshotted data for the version-faithful forecasts
100100 map(
101101 forecast_dates,
102- ~ doctor_visits %>%
103- epix_as_of(.x) %>%
102+ ~ doctor_visits |>
103+ epix_as_of(.x) |>
104104 mutate(version = .x)
105- ) %>%
106- bind_rows() %>%
105+ ) |>
106+ bind_rows() |>
107107 mutate(version_faithful = TRUE),
108108 # Latest data for the version-faithless forecasts
109- doctor_visits %>%
110- epix_as_of(doctor_visits$versions_end) %>%
109+ doctor_visits |>
110+ epix_as_of(doctor_visits$versions_end) |>
111111 mutate(version_faithful = FALSE)
112112)
113113
114114p0 <-
115- ggplot(data = percent_cli_data %>% filter(geo_value == geo_choose)) +
115+ ggplot(data = percent_cli_data |> filter(geo_value == geo_choose)) +
116116 geom_vline(aes(color = factor(version), xintercept = version), lty = 2) +
117117 geom_line(
118118 aes(x = time_value, y = percent_cli, color = factor(version)),
@@ -154,9 +154,9 @@ of the red time-series to its left.
154154In fact, if we take a snapshot and get the last ` time_value ` :
155155
156156``` {r}
157- doctor_visits %>%
158- epix_as_of(as.Date("2020-08-01")) %>%
159- pull(time_value) %>%
157+ doctor_visits |>
158+ epix_as_of(as.Date("2020-08-01")) |>
159+ pull(time_value) |>
160160 max()
161161```
162162
@@ -185,14 +185,14 @@ One way to do this is by setting the `.version` argument for `epix_slide()`:
185185
186186``` {r single_version, warn = FALSE}
187187forecast_date <- as.Date("2021-04-06")
188- forecasts <- doctor_visits %>%
188+ forecasts <- doctor_visits |>
189189 epix_slide(
190190 ~ arx_forecaster(
191191 .x,
192192 outcome = "percent_cli",
193193 predictors = "percent_cli",
194194 args_list = arx_args_list()
195- )$predictions %>%
195+ )$predictions |>
196196 pivot_quantiles_wider(.pred_distn),
197197 .versions = forecast_date
198198 )
@@ -202,12 +202,12 @@ As truth data, we'll compare with the `epix_as_of()` to generate a snapshot of
202202the archive at the last date[ ^ 1 ] .
203203
204204``` {r compare_single_with_result}
205- forecasts %>%
205+ forecasts |>
206206 inner_join(
207- doctor_visits %>%
207+ doctor_visits |>
208208 epix_as_of(doctor_visits$versions_end),
209209 by = c("geo_value", "target_date" = "time_value")
210- ) %>%
210+ ) |>
211211 select(geo_value, forecast_date, .pred, `0.05`, `0.95`, percent_cli)
212212```
213213
@@ -227,9 +227,9 @@ This has the effect of simulating a data set that receives the final version
227227updates every day.
228228
229229``` {r}
230- archive_cases_dv_subset_faux <- doctor_visits %>%
231- epix_as_of(doctor_visits$versions_end) %>%
232- mutate(version = time_value) %>%
230+ archive_cases_dv_subset_faux <- doctor_visits |>
231+ epix_as_of(doctor_visits$versions_end) |>
232+ mutate(version = time_value) |>
233233 as_epi_archive()
234234```
235235
@@ -251,10 +251,10 @@ forecast_wrapper <- function(
251251 lags = c(0:7, 14, 21),
252252 adjust_latency = "extend_ahead"
253253 )
254- )$predictions %>%
254+ )$predictions |>
255255 pivot_quantiles_wider(.pred_distn)
256256 }
257- ) %>%
257+ ) |>
258258 bind_rows()
259259}
260260```
@@ -276,20 +276,20 @@ forecast_dates <- seq(
276276)
277277aheads <- c(1, 7, 14, 21, 28)
278278
279- version_faithless <- archive_cases_dv_subset_faux %>%
279+ version_faithless <- archive_cases_dv_subset_faux |>
280280 epix_slide(
281281 ~ forecast_wrapper(.x, aheads, "percent_cli", "percent_cli"),
282282 .before = 120,
283283 .versions = forecast_dates
284- ) %>%
284+ ) |>
285285 mutate(version_faithful = FALSE)
286286
287- version_faithful <- doctor_visits %>%
287+ version_faithful <- doctor_visits |>
288288 epix_slide(
289289 ~ forecast_wrapper(.x, aheads, "percent_cli", "percent_cli"),
290290 .before = 120,
291291 .versions = forecast_dates
292- ) %>%
292+ ) |>
293293 mutate(version_faithful = TRUE)
294294
295295forecasts <-
@@ -316,8 +316,8 @@ ny), we'll just display the results for two states, California (CA) and Florida
316316
317317``` {r plot_ca_forecasts, warning = FALSE}
318318geo_choose <- "ca"
319- forecasts_filtered <- forecasts %>%
320- filter(geo_value == geo_choose) %>%
319+ forecasts_filtered <- forecasts |>
320+ filter(geo_value == geo_choose) |>
321321 mutate(time_value = version)
322322
323323p1 <- # first plotting the forecasts as bands, lines and points
@@ -326,10 +326,10 @@ p1 <- # first plotting the forecasts as bands, lines and points
326326 geom_line(aes(y = .pred, color = factor(time_value)), linetype = 2L) +
327327 geom_point(aes(y = .pred, color = factor(time_value)), size = 0.75) +
328328 # the forecast date
329- geom_vline(data = percent_cli_data %>% filter(geo_value == geo_choose) %>% select(-version_faithful), aes(color = factor(version), xintercept = version), lty = 2) +
329+ geom_vline(data = percent_cli_data |> filter(geo_value == geo_choose) |> select(-version_faithful), aes(color = factor(version), xintercept = version), lty = 2) +
330330 # the underlying data
331331 geom_line(
332- data = percent_cli_data %>% filter(geo_value == geo_choose),
332+ data = percent_cli_data |> filter(geo_value == geo_choose),
333333 aes(x = time_value, y = percent_cli, color = factor(version)),
334334 inherit.aes = FALSE, na.rm = TRUE
335335 ) +
@@ -342,8 +342,8 @@ p1 <- # first plotting the forecasts as bands, lines and points
342342
343343``` {r plot_fl_forecasts, warning = FALSE}
344344geo_choose <- "fl"
345- forecasts_filtered <- forecasts %>%
346- filter(geo_value == geo_choose) %>%
345+ forecasts_filtered <- forecasts |>
346+ filter(geo_value == geo_choose) |>
347347 mutate(time_value = version)
348348
349349p2 <-
@@ -352,11 +352,11 @@ p2 <-
352352 geom_line(aes(y = .pred, color = factor(time_value)), linetype = 2L) +
353353 geom_point(aes(y = .pred, color = factor(time_value)), size = 0.75) +
354354 geom_vline(
355- data = percent_cli_data %>% filter(geo_value == geo_choose) %>% select(-version_faithful),
355+ data = percent_cli_data |> filter(geo_value == geo_choose) |> select(-version_faithful),
356356 aes(color = factor(version), xintercept = version), lty = 2
357357 ) +
358358 geom_line(
359- data = percent_cli_data %>% filter(geo_value == geo_choose),
359+ data = percent_cli_data |> filter(geo_value == geo_choose),
360360 aes(x = time_value, y = percent_cli, color = factor(version)),
361361 inherit.aes = FALSE, na.rm = TRUE
362362 ) +
398398
399399
400400[ ^ 1 ] : For forecasting a single day like this, we could have actually just used
401- ` doctor_visits %>% epix_as_of(forecast_date) ` to get the relevant snapshot, and then fed that into ` arx_forecaster() ` as we did in the [ landing
401+ ` doctor_visits |> epix_as_of(forecast_date) ` to get the relevant snapshot, and then fed that into ` arx_forecaster() ` as we did in the [ landing
402402page] ( ../index.html#motivating-example ) .
403403
404404
0 commit comments