Skip to content

Commit b8d5ac6

Browse files
authored
Merge pull request #104 from cmu-delphi/94-create-an-epi_df-constructor
Created an `epi_df` constructor
2 parents fdf0a49 + edb07db commit b8d5ac6

File tree

4 files changed

+156
-41
lines changed

4 files changed

+156
-41
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export(growth_rate)
4040
export(is_epi_archive)
4141
export(is_epi_df)
4242
export(mutate)
43+
export(new_epi_df)
4344
export(relocate)
4445
export(rename)
4546
export(slice)

R/epi_df.R

Lines changed: 81 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,85 @@
8484
#' @name epi_df
8585
NULL
8686

87+
88+
#' Creates an `epi_df` object
89+
#'
90+
#' Creates a new `epi_df` object. By default, builds an empty tibble with the
91+
#' correct metadata for an `epi_df` object (ie. `geo_type`, `time_type`, and `as_of`).
92+
#' Refer to the below info. about the arguments for more details.
93+
#'
94+
#' @param x A data.frame, [tibble::tibble], or [tsibble::tsibble] to be converted
95+
#' @param geo_type Type for the geo values. If missing, then the function will
96+
#' attempt to infer it from the geo values present; if this fails, then it
97+
#' will be set to "custom".
98+
#' @param time_type Type for the time values. If missing, then the function will
99+
#' attempt to infer it from the time values present; if this fails, then it
100+
#' will be set to "custom".
101+
#' @param as_of Time value representing the time at which the given data were
102+
#' available. For example, if `as_of` is January 31, 2022, then the `epi_df`
103+
#' object that is created would represent the most up-to-date version of the
104+
#' data available as of January 31, 2022. If the `as_of` argument is missing,
105+
#' then the current day-time will be used.
106+
#' @param additional_metadata List of additional metadata to attach to the
107+
#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and
108+
#' `as_of` fields; named entries from the passed list or will be included as
109+
#' well.
110+
#' @param ... Additional arguments passed to methods.
111+
#' @return An `epi_df` object.
112+
#'
113+
#' @export
114+
new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
115+
additional_metadata = list(), ...) {
116+
# Check that we have a data frame
117+
if (!is.data.frame(x)) {
118+
Abort("`x` must be a data frame.")
119+
}
120+
121+
# If geo type is missing, then try to guess it
122+
if (missing(geo_type)) {
123+
geo_type = guess_geo_type(x$geo_value)
124+
}
125+
126+
# If time type is missing, then try to guess it
127+
if (missing(time_type)) {
128+
time_type = guess_time_type(x$time_value)
129+
}
130+
131+
# If as_of is missing, then try to guess it
132+
if (missing(as_of)) {
133+
# First check the metadata for an as_of field
134+
if ("metadata" %in% names(attributes(x)) &&
135+
"as_of" %in% names(attributes(x)$metadata)) {
136+
as_of = attributes(x)$metadata$as_of
137+
}
138+
139+
# Next check for as_of, issue, or version columns
140+
else if ("as_of" %in% names(x)) as_of = max(x$as_of)
141+
else if ("issue" %in% names(x)) as_of = max(x$issue)
142+
else if ("version" %in% names(x)) as_of = max(x$version)
143+
144+
# If we got here then we failed
145+
else as_of = Sys.time() # Use the current day-time
146+
}
147+
148+
# Define metadata fields
149+
metadata = list()
150+
metadata$geo_type = geo_type
151+
metadata$time_type = time_type
152+
metadata$as_of = as_of
153+
metadata = c(metadata, additional_metadata)
154+
155+
# Reorder columns (geo_value, time_value, ...)
156+
if(sum(dim(x)) != 0){
157+
x = dplyr::relocate(x, .data$geo_value, .data$time_value)
158+
}
159+
160+
# Apply epi_df class, attach metadata, and return
161+
class(x) = c("epi_df", class(x))
162+
attributes(x)$metadata = metadata
163+
return(x)
164+
}
165+
87166
#' Convert to `epi_df` format
88167
#'
89168
#' Converts a data frame or tibble into an `epi_df` object. See the [getting
@@ -142,47 +221,8 @@ as_epi_df.tbl_df = function(x, geo_type, time_type, as_of,
142221
Abort("`x` must contain a `time_value` column.")
143222
}
144223

145-
# If geo type is missing, then try to guess it
146-
if (missing(geo_type)) {
147-
geo_type = guess_geo_type(x$geo_value)
148-
}
149-
150-
# If time type is missing, then try to guess it
151-
if (missing(time_type)) {
152-
time_type = guess_time_type(x$time_value)
153-
}
154-
155-
# If as_of is missing, then try to guess it
156-
if (missing(as_of)) {
157-
# First check the metadata for an as_of field
158-
if ("metadata" %in% names(attributes(x)) &&
159-
"as_of" %in% names(attributes(x)$metadata)) {
160-
as_of = attributes(x)$metadata$as_of
161-
}
162-
163-
# Next check for as_of, issue, or version columns
164-
else if ("as_of" %in% names(x)) as_of = max(x$as_of)
165-
else if ("issue" %in% names(x)) as_of = max(x$issue)
166-
else if ("version" %in% names(x)) as_of = max(x$version)
167-
168-
# If we got here then we failed
169-
else as_of = Sys.time() # Use the current day-time
170-
}
171-
172-
# Define metadata fields
173-
metadata = list()
174-
metadata$geo_type = geo_type
175-
metadata$time_type = time_type
176-
metadata$as_of = as_of
177-
metadata = c(metadata, additional_metadata)
178-
179-
# Reorder columns (geo_value, time_value, ...)
180-
x = dplyr::relocate(x, .data$geo_value, .data$time_value)
181-
182-
# Apply epi_df class, attach metadata, and return
183-
class(x) = c("epi_df", class(x))
184-
attributes(x)$metadata = metadata
185-
return(x)
224+
new_epi_df(x, geo_type, time_type, as_of,
225+
additional_metadata = list(), ...)
186226
}
187227

188228
#' @method as_epi_df data.frame

man/new_epi_df.Rd

Lines changed: 47 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-epi_df.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
test_that("new_epi_df works as intended", {
2+
3+
# Empty tibble
4+
wmsg = capture_warnings(a <- new_epi_df())
5+
expect_match(wmsg[1],
6+
"Unknown or uninitialised column: `geo_value`.")
7+
expect_match(wmsg[2],
8+
"Unknown or uninitialised column: `time_value`.")
9+
expect_true(is_epi_df(a))
10+
expect_identical(attributes(a)$metadata$geo_type, "custom")
11+
expect_identical(attributes(a)$metadata$time_type, "custom")
12+
expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of))
13+
14+
# Simple non-empty tibble with geo_value and time_value cols
15+
tib <- tibble::tibble(
16+
x = 1:10, y = 1:10,
17+
time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2),
18+
geo_value = rep(c("ca", "hi"), each = 5)
19+
)
20+
21+
epi_tib = new_epi_df(tib)
22+
expect_true(is_epi_df(epi_tib))
23+
expect_length(epi_tib, 4L)
24+
expect_identical(attributes(epi_tib)$metadata$geo_type, "state")
25+
expect_identical(attributes(epi_tib)$metadata$time_type, "day")
26+
expect_true(lubridate::is.POSIXt(attributes(epi_tib)$metadata$as_of))
27+
})

0 commit comments

Comments
 (0)