Skip to content

Commit 926cda3

Browse files
committed
merge main
Merge branch 'main' into use-delphidocs # Conflicts: # DESCRIPTION
2 parents cbb0152 + 3bb5a82 commit 926cda3

File tree

70 files changed

+2184
-531
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2184
-531
lines changed

DESCRIPTION

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,52 @@ Type: Package
22
Package: epidatasets
33
Title: Epidemiological Data for Delphi Tooling Examples
44
Version: 0.0.1
5-
Authors@R:
6-
person(c("Daniel", "J."), "McDonald", , "daniel@stat.ubc.ca", role = c("cre", "aut"))
5+
Authors@R: c(
6+
person(c("Daniel", "J."), "McDonald", , "daniel@stat.ubc.ca", role = "aut"),
7+
person("Nat", "DeFries", , "ndefries@andrew.cmu.edu", role = c("cre", "aut")),
8+
person("Johns Hopkins University Center for Systems Science and Engineering", role = "dtc",
9+
comment = "Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
10+
person("Johns Hopkins University", role = "cph",
11+
comment = "Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
12+
person("Carnegie Mellon University Delphi Group", role = "dtc",
13+
comment = "Owner of masking, social-distancing, and CLI data from the COVID-19 Trends and Impacts Survey. Owner of claims-based CLI data from the Delphi Epidata API"),
14+
person("The COVID-19 Canada Open Data Working Group", role = "dtc",
15+
comment = "Owner of Canadian COVID-19 cases rates from the Covid19Canada data repository"),
16+
person("Statistics Canada", role = "dtc",
17+
comment = "Owner of Canadian graduate employment income data from the Statistics Canada website"),
18+
person("Google", role = "dtc",
19+
comment = "Collaborator on CLI data from the Google symptom surveys")
20+
)
721
Description: This package contains data sets used to compile vignettes and
822
other documentation in Delphi R Packages. The goal is to avoid calls
9-
to the Delphi Epidata API, and deposit some examples here for easy
23+
to the Delphi Epidata API, and to deposit some examples here for easy
1024
offline use.
1125
License: MIT + file LICENSE
12-
URL: https://cmu-delphi.github.io/epidatasets/
26+
URL: https://github.com/cmu-delphi/epidatasets,
27+
https://cmu-delphi.github.io/epidatasets/
1328
Depends:
1429
R (>= 2.10)
1530
Suggests:
1631
covidcast,
32+
data.table,
1733
dplyr,
1834
epidatr,
19-
epipredict,
2035
here,
36+
httr,
37+
jsonlite,
2138
lubridate,
2239
magrittr,
2340
purrr,
2441
readr
42+
Enhances:
43+
epiprocess (>= 0.9.0),
44+
tibble
2545
Remotes:
2646
cmu-delphi/delphidocs,
2747
cmu-delphi/epidatr,
28-
cmu-delphi/epipredict,
2948
cmu-delphi/epiprocess
3049
Config/Needs/website: cmu-delphi/delphidocs
3150
Encoding: UTF-8
3251
LazyData: true
33-
RoxygenNote: 7.2.3
52+
Roxygen: list(markdown = TRUE)
53+
RoxygenNote: 7.3.2

LICENSE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
YEAR: 2023
2-
COPYRIGHT HOLDER: epidatasets authors
1+
This contains a collection of data from different sources under different
2+
licenses; please see the documentation for each object for license information.

LICENSE.md

Lines changed: 0 additions & 21 deletions
This file was deleted.

R/epipredict-data.R

Lines changed: 312 additions & 24 deletions
Large diffs are not rendered by default.

R/epiprocess-data.R

Lines changed: 184 additions & 85 deletions
Large diffs are not rendered by default.

R/sysdata.rda

6.47 MB
Binary file not shown.

data-raw/_helper.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
library(here)
2+
3+
internal_data_path <- here("R/sysdata.rda")
4+
5+
save_to_sysdata <- function(obj, obj_name) {
6+
# Create a new environment for storing sysdata contents
7+
sysdata_env <- new.env(hash = FALSE, parent = emptyenv())
8+
9+
# Load current internal data into this new environment
10+
if(file.exists(internal_data_path)) {
11+
load(internal_data_path, envir = sysdata_env)
12+
}
13+
14+
# Add or replace the object
15+
sysdata_env[[obj_name]] <- obj
16+
17+
# Save the environment back to the internal package datafile
18+
save(
19+
list = names(sysdata_env),
20+
file = internal_data_path,
21+
envir = sysdata_env,
22+
compress = "xz",
23+
# For backwards compatibility with older R versions (<3.5)
24+
version = 2
25+
)
26+
}

data-raw/_run_all.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
library(here)
2+
3+
internal_data_path <- here("data-raw")
4+
files <- list.files(
5+
internal_data_path, pattern = ".*[.]R",
6+
full.names = FALSE
7+
)
8+
for (file in files) {
9+
if (startsWith(file, "_")) {
10+
# File is a helper script and does not generate data.
11+
next
12+
}
13+
path <- here(file.path("data-raw", file))
14+
message("running ", path, " ...")
15+
source(path)
16+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
library(dplyr)
2+
library(epidatr)
3+
library(epiprocess)
4+
5+
source(here::here("data-raw/_helper.R"))
6+
7+
dv_subset <- pub_covidcast(
8+
source = "doctor-visits",
9+
signals = "smoothed_adj_cli",
10+
time_type = "day",
11+
geo_type = "state",
12+
time_values = epirange(20200601, 20211201),
13+
geo_values = "*",
14+
issues = epirange(20200601, 20211201)
15+
) %>%
16+
select(geo_value, time_value, version = issue, percent_cli = value) %>%
17+
# Drop DC and territories.
18+
filter(!(geo_value %in% c("as", "gu", "dc", "mp", "pr", "vi"))) %>%
19+
# We're using compactify=FALSE here and below to avoid some testthat test
20+
# failures on tests that were based on a non-compactified version.
21+
as_epi_archive(compactify = FALSE)
22+
23+
case_rate_subset <- pub_covidcast(
24+
source = "jhu-csse",
25+
signals = "confirmed_7dav_incidence_prop",
26+
time_type = "day",
27+
geo_type = "state",
28+
time_values = epirange(20200601, 20211201),
29+
geo_values = "*",
30+
issues = epirange(20200601, 20211201)
31+
) %>%
32+
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
33+
filter(!(geo_value %in% c("as", "gu", "dc", "mp", "pr", "vi"))) %>%
34+
as_epi_archive(compactify = FALSE)
35+
36+
# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing
37+
# behavior. After merging, convert DT component back to tibble.
38+
archive_cases_dv_subset_all_states_tbl = epix_merge(
39+
dv_subset, case_rate_subset,
40+
sync = "locf",
41+
compactify = TRUE)$DT %>%
42+
as_tibble()
43+
44+
# We're trying to do:
45+
# usethis::use_data(archive_cases_dv_subset_all_states_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
46+
# but `usethis::use_data` can only store multiple objects if they're added in
47+
# the same call. This workaround is from
48+
# https://github.com/r-lib/usethis/issues/1512
49+
save_to_sysdata(archive_cases_dv_subset_all_states_tbl, "archive_cases_dv_subset_all_states_tbl")
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,46 @@
1-
dv_subset <- covidcast(
2-
data_source = "doctor-visits",
1+
library(dplyr)
2+
library(epidatr)
3+
library(epiprocess)
4+
5+
source(here::here("data-raw/_helper.R"))
6+
7+
dv_subset <- pub_covidcast(
8+
source = "doctor-visits",
39
signals = "smoothed_adj_cli",
410
time_type = "day",
511
geo_type = "state",
612
time_values = epirange(20200601, 20211201),
713
geo_values = "ca,fl,ny,tx",
814
issues = epirange(20200601, 20211201)
915
) %>%
10-
fetch() %>%
1116
select(geo_value, time_value, version = issue, percent_cli = value) %>%
1217
# We're using compactify=FALSE here and below to avoid some testthat test
1318
# failures on tests that were based on a non-compactified version.
1419
as_epi_archive(compactify = FALSE)
1520

16-
case_rate_subset <- covidcast(
17-
data_source = "jhu-csse",
21+
case_rate_subset <- pub_covidcast(
22+
source = "jhu-csse",
1823
signals = "confirmed_7dav_incidence_prop",
1924
time_type = "day",
2025
geo_type = "state",
2126
time_values = epirange(20200601, 20211201),
2227
geo_values = "ca,fl,ny,tx",
2328
issues = epirange(20200601, 20211201)
2429
) %>%
25-
fetch() %>%
2630
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
2731
as_epi_archive(compactify = FALSE)
2832

29-
archive_cases_dv_subset = epix_merge(
33+
# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing
34+
# behavior. After merging, convert DT component back to tibble.
35+
archive_cases_dv_subset_tbl = epix_merge(
3036
dv_subset, case_rate_subset,
3137
sync = "locf",
32-
compactify = FALSE)
33-
34-
# If we directly store an epi_archive R6 object as data, it will store its class
35-
# implementation there as well. To prevent mismatches between these stored
36-
# implementations and the latest class definition, don't store them as R6
37-
# objects; store the DT and construct the R6 object on request.
38-
archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT
38+
compactify = FALSE)$DT %>%
39+
as_tibble()
3940

40-
usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE)
41+
# We're trying to do:
42+
# usethis::use_data(archive_cases_dv_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
43+
# but `usethis::use_data` can only store multiple objects if they're added in
44+
# the same call. This workaround is from
45+
# https://github.com/r-lib/usethis/issues/1512
46+
save_to_sysdata(archive_cases_dv_subset_tbl, "archive_cases_dv_subset_tbl")

0 commit comments

Comments
 (0)