diff --git a/DESCRIPTION b/DESCRIPTION index 996dfbf..07a598e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: gfoRmulaICE Type: Package Title: Parametric Iterative Conditional Expectation G-Formula -Version: 0.1.0 +Version: 1.1.0 Authors@R: c(person("Zhaoxi", "Cheng", role = c("aut", "cre"), email = "zcheng@hsph.harvard.edu"), person("Jing", "Li", role = c("aut"), diff --git a/R/ICE.R b/R/ICE.R index b1e92a5..4188172 100644 --- a/R/ICE.R +++ b/R/ICE.R @@ -428,8 +428,7 @@ ice <- function(data, time_points, id, time_name, clean_kwarg_str <- preprocess_dynamic$clean_kwarg_str kwarg_list <- preprocess_dynamic$kwarg_list - - # print(clean_kwarg_str) + ## get argument list args <- c(as.list(environment()), eval(parse(text = clean_kwarg_str))) kwargs_len <- length(eval(parse(text = clean_kwarg_str))) @@ -583,11 +582,13 @@ ice <- function(data, time_points, id, time_name, interv_var_single <- list() interv_time_single <- list() + for (treat in 1:length(interv_idx)) { treat_idx <- interv_idx[treat] treat_name <- paste0("intervention", interv_list_origin[treat_idx]) interv <- as.list(arg_interv[[which(names(arg_interv) == treat_name)]])[[1]] + interv_var <- interv_all_names[treat] des <- as.list(int_descript[i]) if (length(arg_interv[[which(names(arg_interv) == treat_name)]]) == 2) { @@ -596,6 +597,7 @@ ice <- function(data, time_points, id, time_name, times <- 0:(K-1) } + interv_single <- c(interv_single, list(interv)) interv_var_single <- c(interv_var_single, list(interv_var)) interv_time_single <- c(interv_time_single, list(times)) @@ -1727,6 +1729,9 @@ construct_interv_value <- function(data, timevar, int_value, int_time, int_var){ for (i in 1:nint) { this_int_value <- int_value[[i]] + if (is.data.frame(this_int_value)) { + this_int_value <- this_int_value[, "interv_values"] + } not_int_idx <- which(!data[, timevar] %in% int_time[[i]]) if (length(not_int_idx) > 0) { diff --git a/R/ICE_pool.R b/R/ICE_pool.R index e685800..777ec48 100644 --- a/R/ICE_pool.R +++ b/R/ICE_pool.R @@ -431,7 +431,18 @@ ice_pool <- function(data, K, id, time_name, outcome_name, competing_formula <- as.formula(paste0(competing_varname, "~", paste0(competing_covar_nc, collapse = "+"))) - competing_fit <- speedglm(competing_formula, data = data, family = binomial()) + competing_fit <- tryCatch(speedglm(competing_formula, data = data, family = binomial()), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(competing_fit)[1] != "speedglm") { + + if (names(competing_fit) == "error" | (names(competing_fit) == "warn" & str_detect(competing_fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + competing_fit <- glm(competing_formula, data = data, family = binomial()) + } + } ## add in this competing fit @@ -491,7 +502,13 @@ ice_pool <- function(data, K, id, time_name, outcome_name, intervention_f <- interventions[[1]][[treat]] interv_it <- intervention_f - interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it + if (is.data.frame(interv_it)) { + interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it[, "interv_values"] + interv_data[, paste0(treatment_varname, "_is_nc")] <- interv_it[, "is_nc"] + } else { + interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it + } + my.arrayofA <- paste0("interv_it_", treatment_varname, "_", treat) ## check if this treatment variable is lagged @@ -540,7 +557,6 @@ ice_pool <- function(data, K, id, time_name, outcome_name, data <- interv_data - ## 3. reshape data and fit outcome model dffullwide <- reshape(data, idvar = id, timevar = time0, direction = "wide", sep = "_") @@ -552,7 +568,18 @@ ice_pool <- function(data, K, id, time_name, outcome_name, ## if outcome model and hazard model share the same input argument, how do you separate model fit here? ## this fit will be used for prediction of outcome in the first iteration and only for hazard estimation. ## but for global option, if the user specifies time in the model, then it must be different than the outcome model. - yfitog = speedglm(formula_full, family = binomial(), data = data) #This is from the data generation mechanism + yfitog = tryCatch(speedglm(formula_full, family = binomial(), data = data), #This is from the data generation mechanism + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(yfitog)[1] != "speedglm") { + + if (names(yfitog) == "error" | (names(yfitog) == "warn" & str_detect(yfitog$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + yfitog <- glm(formula_full, family = binomial(), data = data) + } + } paramtmp = (yfitog)$coef @@ -574,7 +601,19 @@ ice_pool <- function(data, K, id, time_name, outcome_name, if (!is.null(competing_varname) & (total_effect == TRUE) & hazard_based) { formula_full_comp <- as.formula(paste0(competing_varname,"~", paste0(c(competing_covar), collapse = "+"))) - yfitog_comp = speedglm(formula_full_comp, family = binomial(), data = data) + yfitog_comp = tryCatch(speedglm(formula_full_comp, family = binomial(), data = data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(yfitog_comp)[1] != "speedglm") { + + if (names(yfitog_comp) == "error" | (names(yfitog_comp) == "warn" & str_detect(yfitog_comp$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + yfitog_comp <- glm(formula_full_comp, family = binomial(), data = data) + } + + } paramcomp = (yfitog_comp)$coef ## add in this competing fit @@ -604,7 +643,18 @@ ice_pool <- function(data, K, id, time_name, outcome_name, if (global_hazard) { formula_haz_global <- as.formula(paste0(outcome_varname,"~", paste0(c(haz_global_covar), collapse = "+"))) - haz_fit_global <- speedglm(formula_haz_global, family = binomial(), data = data) + haz_fit_global <- tryCatch(speedglm(formula_haz_global, family = binomial(), data = data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(haz_fit_global)[1] != "speedglm") { + + if (names(haz_fit_global) == "error" | (names(haz_fit_global) == "warn" & str_detect(haz_fit_global$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + haz_fit_global <- glm(formula_haz_global, family = binomial(), data = data) + } + } paramhaz <- haz_fit_global$coef this_haz_fit <- list(haz_fit_global) @@ -649,7 +699,18 @@ ice_pool <- function(data, K, id, time_name, outcome_name, haz_formula_t <- as.formula(paste0(outcome_varname, "_", i, "~", paste0(haz_covar_t, collapse = "+"))) - tmp_fit <- speedglm(haz_formula_t, family = binomial(), data = tmpdata) + tmp_fit <- tryCatch(speedglm(haz_formula_t, family = binomial(), data = tmpdata), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(tmp_fit)[1] != "speedglm") { + + if (names(tmp_fit) == "error" | (names(tmp_fit) == "warn" & str_detect(tmp_fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + tmp_fit <- glm(haz_formula_t, family = binomial(), data = tmpdata) + } + } haz_pred_times[[i]] <- tmp_fit @@ -798,7 +859,17 @@ ice_pool <- function(data, K, id, time_name, outcome_name, fit_formula <- as.formula(paste0("y", t, "pred", "~", paste0(c(covar_iter, interact_covar), collapse = "+"))) - fit_temp = speedglm(fit_formula, family = quasibinomial(), data = data_fit) + fit_temp = tryCatch(speedglm(fit_formula, family = quasibinomial(), data = data_fit), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(fit_temp)[1] != "speedglm") { + if (names(fit_temp) == "error" | (names(fit_temp) == "warn" & str_detect(fit_temp$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + fit_temp <- glm(fit_formula, family = quasibinomial(), data = data_fit) + } + } this_outcome_fit <- list(fit_temp) this_outcome_summary <- list(get_summary(fit_temp)) diff --git a/R/ICE_stratify.R b/R/ICE_stratify.R index 52fbb7e..ba78753 100644 --- a/R/ICE_stratify.R +++ b/R/ICE_stratify.R @@ -167,7 +167,7 @@ ice_strat <- function(data, K, id, time_name, outcome_name, hazard_model = NULL, interventions, intervention_names, intervention_times = NULL, compute_nc_risk = TRUE, hazard_based, weighted = FALSE, - treat_model, obs_treatment_names, + treat_model, obs_treatment_names, intervention_description, verbose = TRUE) { ## 0. some pre-processing @@ -416,7 +416,17 @@ ice_strat <- function(data, K, id, time_name, outcome_name, if (total_effect == FALSE) { competing_formula <- as.formula(paste0(competing_varname, "~", paste0(competing_covar_nc, collapse = "+"))) - competing_fit <- speedglm(competing_formula, data = data, family = binomial()) + competing_fit <- tryCatch(speedglm(competing_formula, data = data, family = binomial()), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(competing_fit)[1] != "speedglm") { + if (names(competing_fit) == "error" | (names(competing_fit) == "warn" & str_detect(competing_fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + competing_fit <- glm(competing_formula, data = data, family = binomial()) + } + } ## add in this competing fit @@ -479,8 +489,14 @@ ice_strat <- function(data, K, id, time_name, outcome_name, if (any(str_detect(as.character(substitute(interventions[[treat]])), "grace_period"))) { my.arrayofA <- paste0("interv_it_", treatment_varname) } else { + interv_it <- intervention_f - interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it + if (is.data.frame(interv_it)) { + interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it[, "interv_values"] + interv_data[, paste0(treatment_varname, "_is_nc")] <- interv_it[, "is_nc"] + } else { + interv_data[, paste0("interv_it_", treatment_varname, "_", treat)] <- interv_it + } my.arrayofA <- paste0("interv_it_", treatment_varname, "_", treat) } @@ -614,12 +630,34 @@ ice_strat <- function(data, K, id, time_name, outcome_name, if (null_censor) { data$pred_c = 0 } else { - cfit = speedglm(cformula, family = binomial(), data = data) + cfit = tryCatch(speedglm(cformula, family = binomial(), data = data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(cfit)[1] != "speedglm") { + if (names(cfit) == "error" | (names(cfit) == "warn" & str_detect(cfit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + cfit <- glm(cformula, family = binomial(), data = data) + } + } + data$pred_c = predict(cfit, newdata = data, type="response") } if (!is.null(competing_varname) & total_effect == FALSE) { - dfit = speedglm(dformula, family = binomial(), data = data) + dfit = tryCatch(speedglm(dformula, family = binomial(), data = data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(dfit)[1] != "speedglm") { + if (names(dfit) == "error" | (names(dfit) == "warn" & str_detect(dfit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + dfit <- glm(dformula, family = binomial(), data = data) + } + } + data$pred_d = predict(dfit, newdata = data, type="response") data$pred_d = (1- data$pred_d) * (1-data$pred_c) @@ -652,7 +690,17 @@ ice_strat <- function(data, K, id, time_name, outcome_name, data <- data %>% dplyr::select(-c("pred_obs_all")) } else { - afit = speedglm(aformula, family = binomial(), data = data) + afit = tryCatch(speedglm(aformula, family = binomial(), data = data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(afit)[1] != "speedglm") { + if (names(afit) == "error" | (names(afit) == "warn" & str_detect(afit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + afit <- glm(aformula, family = binomial(), data = data) + } + } @@ -667,6 +715,10 @@ ice_strat <- function(data, K, id, time_name, outcome_name, } else { data[, paste0("pred_obs_", treatment_i)] = pred_obsa } + + if (paste0(treatment_i, "_is_nc") %in% colnames(data)) { + data[, paste0("pred_obs_", treatment_i)] <- ifelse(data[, paste0(treatment_i, "_is_nc")], 1, data[, paste0("pred_obs_", treatment_i)]) + } } @@ -712,7 +764,17 @@ ice_strat <- function(data, K, id, time_name, outcome_name, ## 4. reshape data and fit outcome model tmpdata = as.data.frame(dffullwide) formula_full <- as.formula(paste0(outcome_varname,"~", paste0(c(outcome_covar), collapse = "+"))) - yfitog = speedglm(formula_full, family = binomial(), data = data) #This is from the data generation mechanism + yfitog = tryCatch(speedglm(formula_full, family = binomial(), data = data), #This is from the data generation mechanism + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(yfitog)[1] != "speedglm") { + if (names(yfitog) == "error" | (names(yfitog) == "warn" & str_detect(yfitog$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + yfitog <- glm(formula_full, family = binomial(), data = data) + } + } paramtmp = (yfitog)$coef @@ -868,7 +930,17 @@ ice_strat <- function(data, K, id, time_name, outcome_name, outcome_formula <- as.formula(paste0(outcome_varname, "_", i, "~", paste0(covar_outcome_t, collapse = "+"))) - tmp_fit <- speedglm(outcome_formula, family = binomial(), data = pred_data) + tmp_fit <- tryCatch(speedglm(outcome_formula, family = binomial(), data = pred_data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(tmp_fit)[1] != "speedglm") { + if (names(tmp_fit) == "error" | (names(tmp_fit) == "warn" & str_detect(tmp_fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + tmp_fit <- glm(outcome_formula, family = binomial(), data = pred_data) + } + } outcome_pred_times[[i]] <- tmp_fit @@ -966,7 +1038,17 @@ ice_strat <- function(data, K, id, time_name, outcome_name, comp_formula <- as.formula(paste0(competing_varname, "_", i-1, "~", paste0(covar_competing_t, collapse = "+"))) - tmp_fit <- speedglm(comp_formula, family = binomial(), data = pred_data) + tmp_fit <- tryCatch(speedglm(comp_formula, family = binomial(), data = pred_data), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(tmp_fit)[1] != "speedglm") { + if (names(tmp_fit) == "error" | (names(tmp_fit) == "warn" & str_detect(tmp_fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + tmp_fit <- glm(comp_formula, family = binomial(), data = pred_data) + } + } comp_pred_times[[i]] <- tmp_fit @@ -1149,19 +1231,63 @@ ice_strat <- function(data, K, id, time_name, outcome_name, if (weighted & hazard_based == FALSE) { fitdata$w <- 1/fitdata[, paste0("pi", q)] + if (q == t - 1) { - fit <- speedglm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata) + fit <- tryCatch(speedglm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(fit)[1] != "speedglm") { + if (names(fit) == "error" | (names(fit) == "warn" & str_detect(fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + fit <- glm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata) + } + } + } else { - fit <- speedglm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata) + fit <- tryCatch(speedglm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(fit)[1] != "speedglm") { + if (names(fit) == "error" | (names(fit) == "warn" & str_detect(fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + fit <- glm(temp_formula, family = quasibinomial(), weights = fitdata$w, data = fitdata) + } + } + } } else { if (q == t - 1) { - fit <- speedglm(temp_formula, family = binomial(), data = fitdata) + fit <- tryCatch(speedglm(temp_formula, family = binomial(), data = fitdata), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(fit)[1] != "speedglm") { + if (names(fit) == "error" | (names(fit) == "warn" & str_detect(fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + fit <- glm(temp_formula, family = binomial(), data = fitdata) + } + } + } else { - fit <- speedglm(temp_formula, family = quasibinomial(), data = fitdata) + fit <- tryCatch(speedglm(temp_formula, family = quasibinomial(), data = fitdata), + warning = function(w) {list(warn = conditionMessage(w))}, + error = function(e) {list(error = conditionMessage(e))} + ) + + if (class(fit)[1] != "speedglm") { + if (names(fit) == "error" | (names(fit) == "warn" & str_detect(fit$warn, "without convergence"))) { + message("Error or non-convergence occurs in speedglm. Replacing by glm.") + fit <- glm(temp_formula, family = quasibinomial(), data = fitdata) + } + } } } diff --git a/R/helper.R b/R/helper.R index 7304e4f..8917d0f 100644 --- a/R/helper.R +++ b/R/helper.R @@ -623,12 +623,17 @@ dynamic <- function(condition, strategy_before, strategy_after, absorb = FALSE, first <- absorb - - strategy_before_values <- strategy_before - strategy_after_values <- strategy_after - - interv_values <- get_dynamic_interv_values(condition, strategy_before_values, - strategy_after_values, first, id, time, data) + is_nc_strategy_before <- str_detect("natural_course", as.character(substitute(strategy_before))) + is_nc_strategy_after <- str_detect("natural_course", as.character(substitute(strategy_after))) + + strategy_before_values <- strategy_before + strategy_after_values <- strategy_after + + interv_values <- get_dynamic_interv_values(condition, strategy_before_values, + strategy_after_values, + is_nc_strategy_before, + is_nc_strategy_after, + first, id, time, data) return(interv_values) } @@ -638,6 +643,8 @@ dynamic <- function(condition, strategy_before, strategy_after, absorb = FALSE, #' @param condition a character string that specifies a logical expression, upon which is met, the strategy specified in \code{strategy_after} is followed. #' @param strategy_before_values a function or vector of intervened values in the same length as the number of rows in the observed data \code{data} that specifies the strategy followed after the condition in \code{condition} is met. #' @param strategy_after_values a function or vector of intervened values in the same length as the number of rows in the observed data \code{data} that specifies the strategy followed before the condition in \code{condition} is met. +#' @param is_nc_strategy_before a logical indicating whether the strategy specified in \code{strategy_before} is the natural course. +#' @param is_nc_strategy_after a logical indicating whether the strategy specified in \code{strategy_after} is the natural course. #' @param first a logical indicating whether the strategy specified in \code{strategy_after} starts upon the first time when the condition specified in \code{condition} is met. #' @param id a character string indicating the ID variable name in \code{data}. #' @param time a character string indicating the time variable name in \code{data}. @@ -645,16 +652,20 @@ dynamic <- function(condition, strategy_before, strategy_after, absorb = FALSE, #' #' @return a vector containing the intervened value in the same size as the number of rows in \code{data}. #' @noRd -get_dynamic_interv_values <- function(condition, strategy_before_values, strategy_after_values, first, id, time, data) { +get_dynamic_interv_values <- function(condition, strategy_before_values, strategy_after_values, + is_nc_strategy_before, is_nc_strategy_after, + first, id, time, data) { + + data <- as.data.frame(data) unique_times <- unique(data[, time]) if (first) { - init_info <- data %>% group_by(id) %>% - mutate(first_init = unique_times[which(eval(parse(text = condition)))[1]]) %>% - ungroup() - - init_info[, "is_init"] <- init_info[, time] >= init_info[, "first_init"] + init_info <- data %>% group_by(id) %>% + mutate(first_init = unique_times[which(eval(parse(text = condition)))[1]]) %>% + ungroup() + + init_info[, "is_init"] <- init_info[, time] >= init_info[, "first_init"] } else { init_info <- data %>% mutate(is_init = eval(parse(text = condition))) @@ -666,7 +677,17 @@ get_dynamic_interv_values <- function(condition, strategy_before_values, strateg init_info[, "interv_values"] <- ifelse(init_info$is_init, strategy_after_values, strategy_before_values) - return(init_info[, "interv_values"]) + init_info[, "is_nc"] <- 0 + + if (any(is_nc_strategy_before)) { + init_info[, "is_nc"] <- ifelse(init_info$is_init, 0, 1) + } + + if (any(is_nc_strategy_after)) { + init_info[, "is_nc"] <- ifelse(init_info$is_init, 1, 0) + } + + return(init_info[, c("interv_values", "is_nc")]) } diff --git a/README.md b/README.md index 51097ef..2735b78 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,7 @@ ice_fit4e <- ice(data = gfoRmulaICE::compData, outcome_model = Y ~ L1 + L2, censor_model = C ~ L1 + L2, ref_idx = 0, - estimator = weight(list(A1 ~ L1 + L2, A2 ~ L1 + L2)), + estimator = weight(list(A1 ~ L1 + L2 + t0, A2 ~ L1 + L2 + t0)), nsamples = 1000, ci_method = "percentile", parallel = T,