|
1 | | -#' Fetch only the latest issue for each observation in a data frame |
2 | | -#' |
3 | | -#' Since `covidcast_signal()` can, with the right options, return multiple |
4 | | -#' issues for a single observation in a single geo, we may want only the most |
5 | | -#' recent for plotting, mapping, or other purposes. |
6 | | -#' |
7 | | -#' @param df A `covidcast_signal` data frame |
8 | | -#' @return The same `covidcast_signal` data frame, but with only the latest |
9 | | -#' issue of every observation |
| 1 | +#' Fetch the latest or earliest issue for each observation |
| 2 | +#' |
| 3 | +#' The data returned from `covidcast_signal()` or `covidcast_signals()` can, if |
| 4 | +#' called with the `issues` argument, contain multiple issues for a single |
| 5 | +#' observation in a single location. These functions filter the data frame to |
| 6 | +#' contain only the earliest issue or only the latest issue. |
| 7 | +#' |
| 8 | +#' @param df A `covidcast_signal` or `covidcast_signal_long` data frame, such as |
| 9 | +#' returned from `covidcast_signal()` or the "long" format of |
| 10 | +#' `aggregate_signals()`. |
| 11 | +#' @return A data frame in the same form, but with only the earliest or latest |
| 12 | +#' issue of every observation. Note that these functions sort the data frame |
| 13 | +#' as part of their filtering, so the output data frame rows may be in a |
| 14 | +#' different order. |
10 | 15 | #' @importFrom rlang .data |
11 | | -#' @keywords internal |
| 16 | +#' @export |
12 | 17 | latest_issue <- function(df) { |
13 | | - # Save the attributes, since grouping overwrites them |
14 | | - attrs <- attributes(df) |
15 | | - attrs <- attrs[!(names(attrs) %in% c("row.names", "names"))] |
16 | | - |
17 | | - df <- df %>% |
18 | | - dplyr::arrange(dplyr::desc(.data$issue)) %>% |
19 | | - dplyr::distinct(.data$geo_value, .data$time_value, |
20 | | - .keep_all = TRUE) |
21 | | - |
22 | | - attributes(df) <- c(attributes(df), attrs) |
23 | | - |
24 | | - return(df) |
| 18 | + return(first_or_last_issue(df, TRUE)) |
25 | 19 | } |
26 | 20 |
|
27 | | -#' Fetch only the earliest issue for each observation in a data frame |
28 | | -#' |
29 | | -#' Since `covidcast_signal()` can, with the right options, return multiple |
30 | | -#' issues for a single observation in a single geo, we may want only the most |
31 | | -#' recent for plotting, mapping, or other purposes. |
32 | | -#' |
33 | | -#' @param df A `covidcast_signal` data frame |
34 | | -#' @return The same `covidcast_signal` data frame, but with only the earliest |
35 | | -#' issue of every observation |
36 | | -#' @importFrom rlang .data |
37 | | -#' @keywords internal |
| 21 | +#' @rdname latest_issue |
| 22 | +#' @export |
38 | 23 | earliest_issue <- function(df) { |
39 | | - # Save the attributes, since grouping overwrites them |
| 24 | + return(first_or_last_issue(df, FALSE)) |
| 25 | +} |
| 26 | + |
| 27 | +# Helper to do either first or last issue. |
| 28 | +first_or_last_issue <- function(df, latest) { |
| 29 | + if (!inherits(df, c("covidcast_signal", "covidcast_signal_long"))) { |
| 30 | + stop("`df` must be a `covidcast_signal` or `covidcast_signal_long` data frame") |
| 31 | + } |
| 32 | + |
| 33 | + # Save the attributes, such as metadata, since dplyr drops them |
40 | 34 | attrs <- attributes(df) |
41 | 35 | attrs <- attrs[!(names(attrs) %in% c("row.names", "names"))] |
42 | 36 |
|
| 37 | + issue_sort <- function(df) { |
| 38 | + if (latest) { |
| 39 | + dplyr::arrange(df, dplyr::desc(.data$issue)) |
| 40 | + } else { |
| 41 | + dplyr::arrange(df, .data$issue) |
| 42 | + } |
| 43 | + } |
| 44 | + |
43 | 45 | df <- df %>% |
44 | | - dplyr::arrange(.data$issue) %>% |
45 | | - dplyr::distinct(.data$geo_value, .data$time_value, |
46 | | - .keep_all = TRUE) |
| 46 | + issue_sort() %>% |
| 47 | + dplyr::distinct(.data$data_source, .data$signal, .data$geo_value, |
| 48 | + .data$time_value, .keep_all = TRUE) |
47 | 49 |
|
48 | 50 | attributes(df) <- c(attributes(df), attrs) |
49 | 51 |
|
|
0 commit comments