Skip to content

Commit dba2e30

Browse files
authored
Create extended covid rates data (#8)
* add extended version of covid_case_death_rates * use syntax for new version of epiprocess
1 parent 6ab1437 commit dba2e30

8 files changed

+155
-14
lines changed

R/epipredict-data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#' This data source of confirmed COVID-19 cases and deaths is based on reports
44
#' made available by the Center for Systems Science and Engineering at Johns
55
#' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
6-
#' API. This example data is a snapshot as of March 20, 2024, and
6+
#' API. This example data is a snapshot as of May 31, 2022, and
77
#' ranges from December 31, 2020 to December 31, 2021. It
88
#' includes all states.
99
#'

R/epiprocess-data.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,42 @@
208208
#' * Furthermore, the data has been limited to a very small number of rows,
209209
#' formatted into an `epi_df`, and the signal names slightly altered.
210210
"jhu_confirmed_cumulative_num"
211+
212+
#' JHU daily COVID-19 cases and deaths rates from all states
213+
#'
214+
#' This data source of confirmed COVID-19 cases and deaths is based on reports
215+
#' made available by the Center for Systems Science and Engineering at Johns
216+
#' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
217+
#' API. This example data is a snapshot as of May 31, 2022, and
218+
#' ranges from March 1, 2020 to December 31, 2021. It
219+
#' includes all states.
220+
#'
221+
#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 37576 rows and 4 columns.
222+
#' @section Data dictionary:
223+
#' The data has columns:
224+
#' \describe{
225+
#' \item{geo_value}{the geographic value associated with each row
226+
#' of measurements.}
227+
#' \item{time_value}{the time value associated with each row of measurements.}
228+
#' \item{case_rate}{7-day average signal of number of new
229+
#' confirmed COVID-19 cases per 100,000 population, daily}
230+
#' \item{death_rate}{7-day average signal of number of new confirmed
231+
#' deaths due to COVID-19 per 100,000 population, daily}
232+
#' }
233+
#' @source This object contains a modified part of the
234+
#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
235+
#' as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
236+
#' This data set is licensed under the terms of the
237+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
238+
#' by the Johns Hopkins University on behalf of its Center for Systems Science
239+
#' in Engineering. Copyright Johns Hopkins University 2020.
240+
#'
241+
#' Modifications:
242+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
243+
#' These signals are taken directly from the JHU CSSE
244+
#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
245+
#' without changes. The 7-day average signals are computed by Delphi by
246+
#' calculating moving averages of the preceding 7 days, so the signal for
247+
#' June 7 is the average of the underlying data for June 1 through 7,
248+
#' inclusive.
249+
"covid_case_death_rates_extended"

R/sysdata.rda

92.1 KB
Binary file not shown.

data-raw/case_death_rate_archive_tbl.R

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,26 +47,17 @@ case_death_rate_archive_tbl <- epix_merge(
4747
# Calculate 7-day averages for case and death rates.
4848
case_death_rate_archive_tbl <- case_death_rate_archive_tbl %>%
4949
epix_slide(
50-
before = 365000L, ref_time_values = fc_time_values,
50+
.before = 365000L, .versions = fc_time_values,
5151
function(x, gk, rtv) {
5252
x %>%
5353
group_by(geo_value) %>%
54-
epi_slide_mean(case_rate, before = 6L) %>%
54+
epi_slide_mean(case_rate, .align = "right", .window_size = 7L) %>%
5555
rename(case_rate_7d_av = slide_value_case_rate) %>%
56-
epi_slide_mean(death_rate, before = 6L) %>%
56+
epi_slide_mean(death_rate, .align = "right", .window_size = 7L) %>%
5757
ungroup() %>%
5858
rename(death_rate_7d_av = slide_value_death_rate)
5959
}
6060
) %>%
61-
rename(
62-
version = time_value,
63-
time_value = slide_value_time_value,
64-
geo_value = slide_value_geo_value,
65-
case_rate = slide_value_case_rate,
66-
death_rate = slide_value_death_rate,
67-
case_rate_7d_av = slide_value_case_rate_7d_av,
68-
death_rate_7d_av = slide_value_death_rate_7d_av
69-
) %>%
7061
as_epi_archive(compactify = TRUE)
7162
# Convert DT component back to tibble.
7263
case_death_rate_archive_tbl <- case_death_rate_archive_tbl$DT %>%
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
library(dplyr)
2+
library(epidatr)
3+
4+
source(here::here("data-raw/_helper.R"))
5+
6+
d <- as.Date("2022-05-31")
7+
8+
x <- pub_covidcast(
9+
source = "jhu-csse",
10+
signals = "confirmed_7dav_incidence_prop",
11+
time_type = "day",
12+
geo_type = "state",
13+
time_values = epirange(20200301, 20201231 - 1),
14+
geo_values = "*",
15+
as_of = d
16+
) %>%
17+
select(geo_value, time_value, case_rate = value)
18+
19+
y <- pub_covidcast(
20+
source = "jhu-csse",
21+
signals = "deaths_7dav_incidence_prop",
22+
time_type = "day",
23+
geo_type = "state",
24+
time_values = epirange(20200301, 20201231 - 1),
25+
geo_values = "*",
26+
as_of = d
27+
) %>%
28+
select(geo_value, time_value, death_rate = value)
29+
30+
covid_case_death_rates_extension_tbl <- x %>%
31+
full_join(y, by = c("geo_value", "time_value")) %>%
32+
as_tibble()
33+
34+
# We're trying to do:
35+
# usethis::use_data(covid_case_death_rates_extension_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
36+
# but `usethis::use_data` can only store multiple objects if they're added in
37+
# the same call. This workaround is from
38+
# https://github.com/r-lib/usethis/issues/1512
39+
save_to_sysdata(covid_case_death_rates_extension_tbl, "covid_case_death_rates_extension_tbl")
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
delayedAssign("covid_case_death_rates_extended", local({
2+
if (requireNamespace("epiprocess", quietly = TRUE)) {
3+
d <- as.Date("2022-05-31")
4+
epiprocess::as_epi_df(
5+
dplyr::bind_rows(
6+
epidatasets:::covid_case_death_rates_extension_tbl,
7+
epidatasets:::covid_case_death_rates_tbl
8+
)
9+
, as_of = d)
10+
} else {
11+
warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
12+
dplyr::bind_rows(
13+
epidatasets:::covid_case_death_rates_extension_tbl,
14+
epidatasets:::covid_case_death_rates_tbl
15+
)
16+
}
17+
}))

man/covid_case_death_rates.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/covid_case_death_rates_extended.Rd

Lines changed: 55 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)