99# ' includes `NA`'s)
1010# ' 3. `max_lag`: the amount of time until the final (new) version (same caveat
1111# ' for `drop_nas=FALSE`, though it is far less likely to matter)
12- # ' 4. `spread`: the difference between the smallest and largest values (this
12+ # ' 4. `min_value`: the minimum value across revisions
13+ # ' 5. `max_value`: the maximum value across revisions
14+ # ' 6. `median_value`: the median value across revisions
15+ # ' 7. `spread`: the difference between the smallest and largest values (this
1316# ' always excludes `NA` values)
14- # ' 5 . `rel_spread`: `spread` divided by the largest value (so it will
17+ # ' 8 . `rel_spread`: `spread` divided by the largest value (so it will
1518# ' always be less than 1). Note that this need not be the final value. It will
1619# ' be `NA` whenever `spread` is 0.
17- # ' 6 . `time_near_latest`: This gives the lag when the value is within
20+ # ' 9 . `time_near_latest`: This gives the lag when the value is within
1821# ' `within_latest` (default 20%) of the value at the latest time. For example,
1922# ' consider the series (0,20, 99, 150, 102, 100); then `time_near_latest` is
2023# ' the 5th index, since even though 99 is within 20%, it is outside the window
2124# ' afterwards at 150.
2225# ' @param epi_arch an epi_archive to be analyzed
23- # ' @param ... <[`tidyselect`][dplyr_tidy_select]>, used to choose the column to summarize. If empty, it
24- # ' chooses the first. Currently only implemented for one column at a time.
26+ # ' @param ... <[`tidyselect`][dplyr_tidy_select]>, used to choose the column to
27+ # ' summarize. If empty, it chooses the first. Currently only implemented for
28+ # ' one column at a time.
2529# ' @param drop_nas bool, drop any `NA` values from the archive? After dropping
2630# ' `NA`'s compactify is run again to make sure there are no duplicate values
2731# ' from occasions when the signal is revised to `NA`, and then back to its
2832# ' immediately-preceding value.
2933# ' @param print_inform bool, determines whether to print summary information, or
3034# ' only return the full summary tibble
35+ # ' @param min_waiting_period `difftime`, integer or `NULL`. Sets a cutoff: any
36+ # ' time_values not earlier than `min_waiting_period` before `versions_end` are
37+ # ' removed. `min_waiting_period` should characterize the typical time during
38+ # ' which revisions occur. The default of 60 days corresponds to a typical
39+ # ' final value for case counts as reported in the context of insurance. To
40+ # ' avoid this filtering, either set to `NULL` or 0.
3141# ' @param within_latest double between 0 and 1. Determines the threshold
3242# ' used for the `time_to`
3343# ' @param quick_revision difftime or integer (integer is treated as days), for
@@ -60,6 +70,7 @@ revision_summary <- function(epi_arch,
6070 ... ,
6171 drop_nas = TRUE ,
6272 print_inform = TRUE ,
73+ min_waiting_period = as.difftime(60 , units = " days" ),
6374 within_latest = 0.2 ,
6475 quick_revision = as.difftime(3 , units = " days" ),
6576 few_revisions = 3 ,
@@ -92,6 +103,11 @@ revision_summary <- function(epi_arch,
92103 revision_behavior <-
93104 epi_arch $ DT %> %
94105 select(c(geo_value , time_value , all_of(keys ), version , !! arg ))
106+ if (! is.null(min_waiting_period )) {
107+ revision_behavior <- revision_behavior %> %
108+ filter(abs(time_value - as.Date(epi_arch $ versions_end )) > = min_waiting_period )
109+ }
110+
95111 if (drop_nas ) {
96112 # if we're dropping NA's, we should recompactify
97113 revision_behavior <-
@@ -113,18 +129,22 @@ revision_summary <- function(epi_arch,
113129 n_revisions = dplyr :: n() - 1 ,
114130 min_lag = min(lag ), # nolint: object_usage_linter
115131 max_lag = max(lag ), # nolint: object_usage_linter
116- spread = spread_vec(pick(!! arg )),
117- rel_spread = spread / max_no_na(pick(!! arg )), # nolint: object_usage_linter
132+ min_value = f_no_na(min , pick(!! arg )),
133+ max_value = f_no_na(max , pick(!! arg )),
134+ median_value = f_no_na(median , pick(!! arg )),
118135 time_to = time_within_x_latest(lag , pick(!! arg ), prop = within_latest ), # nolint: object_usage_linter
119136 .groups = " drop"
120137 ) %> %
121138 mutate(
139+ spread = max_value - min_value , # nolint: object_usage_linter
140+ rel_spread = spread / max_value , # nolint: object_usage_linter
122141 # TODO the units here may be a problem
123142 min_lag = as.difftime(min_lag , units = " days" ), # nolint: object_usage_linter
124143 max_lag = as.difftime(max_lag , units = " days" ), # nolint: object_usage_linter
125144 time_near_latest = as.difftime(time_to , units = " days" ) # nolint: object_usage_linter
126145 ) %> %
127- select(- time_to )
146+ select(- time_to ) %> %
147+ relocate(time_value , geo_value , all_of(keys ), n_revisions , min_lag , max_lag , time_near_latest , spread , rel_spread , min_value , max_value , median_value )
128148 if (print_inform ) {
129149 cli_inform(" Min lag (time to first version):" )
130150 difftime_summary(revision_behavior $ min_lag ) %> % print()
@@ -203,31 +223,17 @@ get_last_run <- function(bool_vec, values_from) {
203223 values_from [[length(bool_vec ) - tail(runs $ lengths , n = 1 ) + 1 ]]
204224}
205225
206- # ' the default behavior returns a warning on empty lists, which we do not want,
207- # ' and there is no super clean way of preventing this
226+ # ' use when the default behavior returns a warning on empty lists, which we do
227+ # ' not want, and there is no super clean way of preventing this
208228# ' @keywords internal
209- max_no_na <- function (x ) {
229+ f_no_na <- function (f , x ) {
210230 x <- x [! is.na(x )]
211231 if (length(x ) == 0 ) {
212232 return (Inf )
213233 } else {
214- return (max (x ))
234+ return (f (x ))
215235 }
216236}
217- # ' the default behavior returns a warning on empty lists, which we do not want
218- # ' @keywords internal
219- spread_vec <- function (x ) {
220- x <- x [! is.na(x )]
221- if (length(x ) == 0 ) {
222- return (- Inf )
223- } else {
224- res <- x %> %
225- range(na.rm = TRUE ) %> %
226- diff(na.rm = TRUE )
227- return (res )
228- }
229- }
230-
231237
232238
233239# ' simple util for printing a fraction and it's percent
0 commit comments