@@ -473,52 +473,30 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
473473 ekt_names <- ukey_names [ukey_names != " version" ]
474474 val_names <- all_names [! all_names %in% ukey_names ]
475475
476- Reduce(`&` , lapply(updates_col_refs [ekt_names ], is_locf , abs_tol , TRUE )) &
477- Reduce(`&` , lapply(updates_col_refs [val_names ], is_locf , abs_tol , FALSE ))
478- }
479-
480- # ' Checks to see if a value in a vector is LOCF
481- # ' @description LOCF meaning last observation carried forward (to later
482- # ' versions). Lags the vector by 1, then compares with itself. If `is_key` is
483- # ' `TRUE`, only values that are exactly the same between the lagged and
484- # ' original are considered LOCF. If `is_key` is `FALSE` and `vec` is a vector
485- # ' of numbers ([`base::is.numeric`]), then approximate equality will be used,
486- # ' checking whether the absolute difference between each pair of entries is
487- # ' `<= abs_tol`; if `vec` is something else, then exact equality is used
488- # ' instead.
489- # '
490- # ' @details
491- # '
492- # ' We include epikey-time columns in LOCF comparisons as part of an optimization
493- # ' to avoid slower grouped operations while still ensuring that the first
494- # ' observation for each time series will not be marked as LOCF. We test these
495- # ' key columns for exact equality to prevent chopping off consecutive
496- # ' time_values during flat periods when `abs_tol` is high.
497- # '
498- # ' We use exact equality for non-`is.numeric` double/integer columns such as
499- # ' dates, datetimes, difftimes, `tsibble::yearmonth`s, etc., as these may be
500- # ' used as part of re-indexing or grouping procedures, and we don't want to
501- # ' change the number of groups for those operations when we remove LOCF data
502- # ' during compactification.
503- # '
504- # ' @importFrom dplyr lag if_else
505- # ' @importFrom rlang is_bare_numeric
506- # ' @importFrom vctrs vec_equal
507- # ' @keywords internal
508- is_locf <- function (vec , abs_tol , is_key ) { # nolint: object_usage_linter
509- lag_vec <- lag(vec )
510- if (is.vector(vec , mode = " numeric" ) && ! is_key ) {
511- # (integer or double vector, no class (& no dims); maybe names, which we'll
512- # ignore like `vec_equal`); not a key column
513- res <- unname(if_else(
514- ! is.na(vec ) & ! is.na(lag_vec ),
515- abs(vec - lag_vec ) < = abs_tol ,
516- is.na(vec ) & is.na(lag_vec )
517- ))
518- return (res )
476+ n_updates <- nrow(arranged_updates_df )
477+ if (n_updates == 0L ) {
478+ logical (0L )
479+ } else if (n_updates == 1L ) {
480+ FALSE # sole observation is not LOCF
519481 } else {
520- res <- vec_equal(vec , lag_vec , na_equal = TRUE )
521- return (res )
482+ ekts_tbl <- new_tibble(updates_col_refs [ekt_names ])
483+ vals_tbl <- new_tibble(updates_col_refs [val_names ])
484+ # n_updates >= 2L so we can use `:` naturally (this is the reason for
485+ # separating out n_updates == 1L from this case):
486+ inds1 <- 2L : n_updates
487+ inds2 <- 1L : (n_updates - 1L )
488+ c(
489+ FALSE , # first observation is not LOCF
490+ approx_equal0(ekts_tbl ,
491+ inds1 = inds1 , ekts_tbl , inds2 = inds2 ,
492+ # check ekt cols without tolerance:
493+ abs_tol = 0 , na_equal = TRUE
494+ ) &
495+ approx_equal0(vals_tbl ,
496+ inds1 = inds1 , vals_tbl , inds2 = inds2 ,
497+ abs_tol = abs_tol , na_equal = TRUE
498+ )
499+ )
522500 }
523501}
524502
0 commit comments