From 06dc8bb470318e9e74613d64fbcb5fbfd428d22b Mon Sep 17 00:00:00 2001 From: rl3328 Date: Thu, 17 Apr 2025 16:37:34 +0000 Subject: [PATCH 01/26] add diagnostic module --- R/misc.R | 30 ++++ R/univariate_diagnostics.R | 305 +++++++++++++++++++++++++++++++++++++ 2 files changed, 335 insertions(+) create mode 100644 R/univariate_diagnostics.R diff --git a/R/misc.R b/R/misc.R index 3e9ecdee..8f9e5a7f 100644 --- a/R/misc.R +++ b/R/misc.R @@ -602,6 +602,36 @@ z_to_beta_se <- function(z, maf, n) { return(data.frame(beta = beta, se = se, maf = p)) } +#' Convert Z-scores to P-values +#' +#' This function calculates p-values from given z-scores using a two-tailed normal distribution. +#' It supports vector input to process multiple z-scores simultaneously. +#' +#' @param z Numeric vector. The z-scores to be converted to p-values. +#' +#' @return A numeric vector of p-values corresponding to the input z-scores. +#' +#' @details +#' The function uses the following formula to calculate p-values: +#' p-value = 2 * Φ(-|z|) +#' Where Φ is the cumulative distribution function of the standard normal distribution. +#' +#' @examples +#' z <- c(2.5, -1.8, 3.2, 0.7) +#' pvalues <- z_to_pvalue(z) +#' print(pvalues) +#' +#' @note +#' This function assumes that the input z-scores are from a two-tailed test and +#' are normally distributed. It calculates two-sided p-values. +#' For extremely large absolute z-scores, the resulting p-values may be computed as zero +#' due to floating-point limitations in R. This occurs when the absolute z-score > 37. +#' +#' @export +z_to_pvalue <- function(z) { + 2 * pnorm(-abs(z)) +} + #' Filter events based on provided context name pattern #' #' @param events A character vector of event names diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R new file mode 100644 index 00000000..739f7717 --- /dev/null +++ b/R/univariate_diagnostics.R @@ -0,0 +1,305 @@ +#' Extract SuSiE Results from Finemapping Data +#' +#' This function extracts the trimmed SuSiE results from a finemapping data object, +#' typically obtained from a finemapping RDS file. It's designed to work with +#' the method layer of these files, often named as 'method_RAISS_imputed', 'method', +#' or 'method_NO_QC'. This layer is right under the study layer. +#' +#' @param con_data List. The method layer data from a finemapping RDS file. +#' +#' @return The trimmed SuSiE results (`$susie_result_trimmed`) if available, +#' otherwise NULL. +#' +#' @details +#' The function checks if the input data is empty or if the `$susie_result_trimmed` +#' element is missing. It returns NULL in these cases. If `$susie_result_trimmed` +#' exists and is not empty, it returns this element. +#' +#' @note +#' This function is particularly useful when working with large datasets +#' where not all method layers may contain valid SuSiE results or method layer. +#' +#' @export +get_res <- function(con_data) { + if (length(con_data) == 0) return(NULL) + if (length(con_data$susie_result_trimmed) == 0) { + return(NULL) + print(paste("$susie_result_trimmed is null for", con_data)) + } else { + return(con_data$susie_result_trimmed) + } +} + +#' Process Credible Sets (CS) from Finemapping Results +#' +#' This function extracts and processes information for each Credible Set (CS) +#' from finemapping results, typically obtained from a finemapping RDS file. +#' +#' @param con_data List. The method layer data from a finemapping RDS file that is not empty. +#' @param cs_names Character vector. Names of the Credible Sets, usually in the format "L_". +#' @param top_loci_table Data frame. The $top_loci layer data from the finemapping results. +#' +#' @return A data frame with one row per CS, containing the following columns: +#' \item{cs_name}{Name of the Credible Set} +#' \item{variants_per_cs}{Number of variants in the CS} +#' \item{top_variant}{ID of the variant with the highest PIP in the CS} +#' \item{top_variant_index}{Global index of the top variant} +#' \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS} +#' \item{top_z}{Z-score of the top variant} +#' \item{p_value}{P-value calculated from the top Z-score} +#' \item{cs_corr}{pairwise correlations of other CSs in this RDS with the CS of +#` the current row, delimited by `|`, if there are more than one CSs in this RDS file} +#' +#' @details +#' This function is designed to be used only when there is at least one Credible Set +#' in the finemapping results usually for a given study and block. It processes each CS, +#' extracting key information such as the top variant, its statistics, and +#' correlation information between multiple CS if available. +#' +#' @importFrom purrr map +#' @importFrom dplyr bind_rows +#' @importFrom tibble z_to_pvalue +#' +#' @export +process_cs <- function(con_data, cs_names, top_loci_table) { + results <- map(seq_along(cs_names), function(i) { + cs_name <- cs_names[i] + indices <- con_data$susie_result_trimmed$sets$cs[[cs_name]] + + # Get variants for this CS using the full variant_names list + cs_variants <- con_data$variant_names[indices] + cs_data <- top_loci_table[top_loci_table$variant_id %in% cs_variants, ] + top_row <- which.max(cs_data$pip) + + top_variant <- cs_data$variant_id[top_row] + # Find the global index of the top variant + top_variant_global_index = which(con_data$variant_names == top_variant) + top_pip <- cs_data$pip[top_row] + top_z <- cs_data$z[top_row] + p_value <- z_to_pvalue(top_z) + + # Extract cs_corr + cs_corr <- if (length(cs_names) > 1) { + con_data$susie_result_trimmed$cs_corr[i,] + } else { + NA # Use NA for the second CS or when there's only one CS + } + + # Return results for this CS as a one-row data.frame + result = tibble::tibble( + cs_name = cs_name, + variants_per_cs = length(cs_variants), + top_variant = top_variant, + top_variant_index = top_variant_global_index, + top_pip = top_pip, + top_z = top_z, + p_value = p_value, + cs_corr = list(cs_corr) # list column if cs_corr is a vector + ) + return(result) + }) + # Combine all tibbles into one data frame + final_result <- dplyr::bind_rows(results) + return(final_result) +} + +#' Extract Information for Top Variant from Finemapping Results +#' +#' This function extracts information about the variant with the highest Posterior +#' Inclusion Probability (PIP) from finemapping results, typically used when no +#' Credible Sets (CS) are identified in the analysis. +#' +#' @param con_data List. The method layer data from a finemapping RDS file. +#' +#' @return A data frame with one row containing the following columns: +#' \item{cs_name}{NA (as no CS is identified)} +#' \item{variants_per_cs}{NA (as no CS is identified)} +#' \item{top_variant}{ID of the variant with the highest PIP} +#' \item{top_variant_index}{Index of the top variant in the original data} +#' \item{top_pip}{Highest Posterior Inclusion Probability (PIP)} +#' \item{top_z}{Z-score of the top variant} +#' \item{p_value}{P-value calculated from the top Z-score} +#' \item{cs_corr}{NA (as no CS correlation is available)} +#' +#' @details +#' This function is designed to be used when no Credible Sets are identified in +#' the finemapping results, but information about the most significant variant +#' is still desired. It identifies the variant with the highest PIP and extracts +#' relevant statistical information. +#' +#' @note +#' This function is particularly useful for capturing information about potentially +#' important variants that might be included in Credible Sets under different +#' analysis parameters or lower coverage. It maintains a structure similar to +#' the output of `process_cs()` for consistency in downstream analyses. +#' +#' @seealso +#' \code{\link{process_cs}} for processing when Credible Sets are present. +#' +#' @export +extract_top_pip <- function(con_data) { + # Find the variant with the highest PIP + top_pip_index <- which.max(con_data$susie_result_trimmed$pip) + top_pip <- con_data$susie_result_trimmed$pip[top_pip_index] + top_variant = con_data$variant_names[top_pip_index] + top_z = con_data$sumstats$z[top_pip_index] + p_value = z_to_pvalue(top_z) + # Create the dataframe row + data.frame( + cs_name = NA_character_, + variants_per_cs = NA_integer_, + top_variant = top_variant, + top_variant_index = top_pip_index, + top_pip = top_pip, + top_z = top_z, + p_value = p_value, + cs_corr = NA_character_ + ) +} + +#' Parse Credible Set Correlations from process_cs() Output +#' +#' This function takes the output from `process_cs()` and expands the `cs_corr` column +#' into multiple columns, preserving the original order of correlations. It also +#' calculates maximum and minimum correlation values for each Credible Set. +#' +#' @param df Data frame or data.table. The output from `process_cs()` function, +#' containing a `cs_corr` column with correlation information. +#' +#' @return A data.table with the original columns from the input, plus: +#' \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names +#' based on their position in the original string} +#' \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)} +#' \item{cs_corr_min}{Minimum absolute correlation value} +#' +#' @details +#' The function splits the `cs_corr` column, which typically contains correlation +#' values separated by '|', into individual columns. It preserves the order of +#' these correlations, allowing for easy interpretation in a matrix-like format. +#' +#' @note +#' - This function converts the input to a data.table if it isn't already one. +#' - It handles cases where correlation values might be missing or not in the expected format. +#' - The function assumes that correlation values of 1 represent self-correlations and excludes +#' these when calculating max and min correlations. +#' +#' @importFrom data.table setDT +#' +#' @export +parse_cs_corr <- function(df) { + # Convert to data.table if not already + if (!is.data.table(df)) { + setDT(df) + } + + # Function to extract correlations + extract_correlations <- function(x) { + if(is.na(x) || x == "" || !grepl("\\|", x)) { + return(list(values = numeric(0), max_corr = NA, min_corr = NA)) + } + + values <- as.numeric(unlist(strsplit(as.character(x), "\\|"))) + + + if(length(values) == 0) { + return(list(values = numeric(0), max_corr = NA, min_corr = NA)) + } + values_filtered <- abs(values[values != 1]) + return(list( + values = values, + max_corr = max(abs(values_filtered), na.rm = TRUE), + min_corr = min(abs(values_filtered), na.rm = TRUE) + )) + } + + # Process correlations + processed_results <- lapply(df$cs_corr, extract_correlations) + + # Determine max number of correlations + max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) + + # Create column names + col_names <- paste0("cs_corr_", 1:max_corr_count) + + # Prepare a list of correlation values for each column + corr_list <- lapply(1:max_corr_count, function(j) { + sapply(processed_results, function(x) { + if(length(x$values) >= j) x$values[j] else NA_real_ + }) + }) + + # Add columns to the data.table + for(i in seq_along(col_names)) { + df[, (col_names[i]) := corr_list[[i]]] + } + + # Add max and min columns + df[, cs_corr_max := sapply(processed_results, `[[`, "max_corr")] + df[, cs_corr_min := sapply(processed_results, `[[`, "min_corr")] + + return(df) +} + +#' Process Credible Set Information and Determine Updating Strategy +#' +#' This function categorizes Credible Sets (CS) within a study block into different +#' updating strategies based on their statistical properties and correlations. +#' +#' @param df Data frame. Contains information about Credible Sets for a specific study and block. +#' @param high_corr_cols Character vector. Names of columns in df that represent high correlations. +#' +#' @return A modified data frame with additional columns attached to the diagnostic table: +#' \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.} +#' \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.} +#' \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").} +#' +#' @details +#' This function performs the following steps: +#' 1. Identifies the top CS based on the highest absolute Z-score. +#' 2. Identifies tagged CS based on high p-value and high correlations. +#' 3. Counts total, tagged, and remaining CS. +#' 4. Determines the appropriate updating method based on these counts. +#' +#' The updating methods are: +#' - BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for. +#' - SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS. +#' - BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS. +#' +#' @note +#' This function is part of a developing methodology for automatically handling +#' finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) +#' are subject to refinement and may change in future versions. +#' +#' @importFrom dplyr case_when +#' +#' @export +# cs_exp = cs_exp |> filter(!is.na(p_value)) +auto_decision <- function(df, high_corr_cols) { + # Identify top_cs + top_cs_index <- which.max(abs(df$top_z)) + df$top_cs <- FALSE + df$top_cs[top_cs_index] <- TRUE + + # Identify tagged_cs + df$tagged_cs <- sapply(1:nrow(df), function(i) { + if (df$top_cs[i]) return(FALSE) + if (df$p_value[i] > 1e-4) return(TRUE) + if (length(high_corr_cols) == 0) return(FALSE) + any(sapply(high_corr_cols, function(col) df[i, ..col] == 1)) + }) + + # Count total and remaining CS + total_cs <- nrow(df) + tagged_cs_count <- sum(df$tagged_cs) + remaining_cs <- total_cs - 1 - tagged_cs_count + + # Determine method + df$method <- case_when( + total_cs == 1 | (tagged_cs_count == 0 & remaining_cs == 0) ~ "BVSR", + remaining_cs == 0 ~ "SER", + remaining_cs > 0 ~ "BCR", + TRUE ~ NA_character_ + ) + + return(df) +} \ No newline at end of file From fb8dd37db5ebd03327eb67ca26fcbb3121745c43 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Thu, 17 Apr 2025 18:25:22 +0000 Subject: [PATCH 02/26] add the draft for comprehensive rss_analysis_pipeline --- R/univariate_diagnostics.R | 16 +++--- R/univariate_pipeline.R | 111 +++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 12 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index 739f7717..dab979fa 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -20,7 +20,7 @@ #' where not all method layers may contain valid SuSiE results or method layer. #' #' @export -get_res <- function(con_data) { +get_susie_result <- function(con_data) { if (length(con_data) == 0) return(NULL) if (length(con_data$susie_result_trimmed) == 0) { return(NULL) @@ -61,7 +61,7 @@ get_res <- function(con_data) { #' @importFrom tibble z_to_pvalue #' #' @export -process_cs <- function(con_data, cs_names, top_loci_table) { +extract_cs_info <- function(con_data, cs_names, top_loci_table) { results <- map(seq_along(cs_names), function(i) { cs_name <- cs_names[i] indices <- con_data$susie_result_trimmed$sets$cs[[cs_name]] @@ -131,13 +131,13 @@ process_cs <- function(con_data, cs_names, top_loci_table) { #' This function is particularly useful for capturing information about potentially #' important variants that might be included in Credible Sets under different #' analysis parameters or lower coverage. It maintains a structure similar to -#' the output of `process_cs()` for consistency in downstream analyses. +#' the output of `extract_cs_info()` for consistency in downstream analyses. #' #' @seealso -#' \code{\link{process_cs}} for processing when Credible Sets are present. +#' \code{\link{extract_cs_info}} for processing when Credible Sets are present. #' #' @export -extract_top_pip <- function(con_data) { +extract_top_pip_info <- function(con_data) { # Find the variant with the highest PIP top_pip_index <- which.max(con_data$susie_result_trimmed$pip) top_pip <- con_data$susie_result_trimmed$pip[top_pip_index] @@ -157,13 +157,13 @@ extract_top_pip <- function(con_data) { ) } -#' Parse Credible Set Correlations from process_cs() Output +#' Parse Credible Set Correlations from extract_cs_info() Output #' -#' This function takes the output from `process_cs()` and expands the `cs_corr` column +#' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column #' into multiple columns, preserving the original order of correlations. It also #' calculates maximum and minimum correlation values for each Credible Set. #' -#' @param df Data frame or data.table. The output from `process_cs()` function, +#' @param df Data frame or data.table. The output from `extract_cs_info()` function, #' containing a `cs_corr` column with correlation information. #' #' @return A data.table with the original columns from the input, plus: diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index a80ba03c..40e5fc55 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -183,7 +183,7 @@ rss_analysis_pipeline <- function( coverage = c(0.95, 0.7, 0.5), signal_cutoff = 0.025 ), impute = TRUE, impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01), - pip_cutoff_to_skip = 0, remove_indels = FALSE, comment_string = "#") { + pip_cutoff_to_skip = 0, remove_indels = FALSE, comment_string = "#", diagnostics = FALSE) { res <- list() rss_input <- load_rss_data( sumstat_path = sumstat_path, column_file_path = column_file_path, @@ -252,14 +252,117 @@ rss_analysis_pipeline <- function( } } if (impute & !is.null(qc_method)) { - method_name <- paste0(toupper(qc_method), "_RAISS_imputed") + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") } else if (!impute & !is.null(qc_method)) { - method_name <- toupper(qc_method) + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) } else { - method_name <- "NO_QC" + method_name <- paste0(finemapping_method, "_", "NO_QC") } result_list <- list() result_list[[method_name]] <- res result_list[["rss_data_analyzed"]] <- sumstats + + + if (diagnostics) { + if (length(res) > 0) { + bvsr_res = get_susie_result(res) + bvsr_cs_num = if(!is.null(bvsr_res)) length(bvsr_res$sets$cs) else NULL + if (isTRUE(bvsr_cs_num > 0)) { # have CS + # Get the names of the credible sets + cs_names_bvsr = names(bvsr_res$sets$cs) + block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) + } else { # no CS + bvsr_res = get_susie_result(res) + if (sum(bvsr_res$pip > signal_cutoff) > 0) { + block_cs_metrics = extract_top_pip(res) + } + } + } + # sensitive check for additional analyses + if (block_cs_metrics$variants_per_cs > 1) { + block_cs_metrics <- block_cs_metrics %>% + group_by(study, block) %>% + mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) { + NA_real_ # Use NA instead of -Inf when all values are NA + } else { + max(cs_corr_max, na.rm = TRUE) + }) %>% + ungroup() + if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) { + finemapping_method <- "bayesian_conditional_regression" + pri_coverage <- finemapping_opts$coverage[1] + sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL + bcr <- susie_rss_pipeline(sumstats, LD_mat, + n = n, var_y = var_y, + L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, + analysis_method = finemapping_method, + coverage = pri_coverage, + secondary_coverage = sec_coverage, + signal_cutoff = finemapping_opts$signal_cutoff + ) + if (!is.null(qc_method)) { + bcr$outlier_number <- qc_results$outlier_number + } + if (impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") + } else if (!impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) + } else { + method_name <- paste0(finemapping_method, "_", "NO_QC") + } + result_list[[method_name]] <- bcr + + finemapping_method <- "single_effect" + sumstats <- preprocess_results$sumstats + LD_mat <- preprocess_results$LD_mat + ser <- susie_rss_pipeline(sumstats, LD_mat, + n = n, var_y = var_y, + L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, + analysis_method = finemapping_method, + coverage = pri_coverage, + secondary_coverage = sec_coverage, + signal_cutoff = finemapping_opts$signal_cutoff + ) + qc_method <- NULL + impute <- FALSE + # if (!is.null(qc_method)) { + # ser$outlier_number <- qc_results$outlier_number + # } + if (impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") + } else if (!impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) + } else { + method_name <- paste0(finemapping_method, "_", "NO_QC") + } + result_list[[method_name]] <- ser + } + } else { # variants_per_cs > 1 or NA + finemapping_method <- "single_effect" + sumstats <- preprocess_results$sumstats + LD_mat <- preprocess_results$LD_mat + ser <- susie_rss_pipeline(sumstats, LD_mat, + n = n, var_y = var_y, + L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, + analysis_method = finemapping_method, + coverage = pri_coverage, + secondary_coverage = sec_coverage, + signal_cutoff = finemapping_opts$signal_cutoff + ) + qc_method <- NULL + impute <- FALSE + # if (!is.null(qc_method)) { + # ser$outlier_number <- qc_results$outlier_number + # } + if (impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") + } else if (!impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) + } else { + method_name <- paste0(finemapping_method, "_", "NO_QC") + } + result_list[[method_name]] <- ser + } return(result_list) + } } From e9b02b41650fc4f576095ce0df97d305dd3c254f Mon Sep 17 00:00:00 2001 From: rl3328 Date: Thu, 17 Apr 2025 18:33:09 +0000 Subject: [PATCH 03/26] output diagnostic table --- R/univariate_pipeline.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 40e5fc55..876b4be2 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -262,7 +262,7 @@ rss_analysis_pipeline <- function( result_list[[method_name]] <- res result_list[["rss_data_analyzed"]] <- sumstats - + block_cs_metrics <- list() if (diagnostics) { if (length(res) > 0) { bvsr_res = get_susie_result(res) @@ -363,6 +363,6 @@ rss_analysis_pipeline <- function( } result_list[[method_name]] <- ser } - return(result_list) + return(result_list = result_list, block_cs_metrics = block_cs_metrics) } } From b0b5e59c637a36fed07b1eff5b0be8b15a885c7c Mon Sep 17 00:00:00 2001 From: rl3328 Date: Thu, 17 Apr 2025 20:59:40 +0000 Subject: [PATCH 04/26] Update documentation --- NAMESPACE | 8 +++++++ man/auto_decision.Rd | 40 ++++++++++++++++++++++++++++++++++ man/extract_cs_info.Rd | 28 ++++++++++++++++++++++++ man/extract_top_pip_info.Rd | 42 ++++++++++++++++++++++++++++++++++++ man/get_susie_result.Rd | 30 ++++++++++++++++++++++++++ man/parse_cs_corr.Rd | 35 ++++++++++++++++++++++++++++++ man/rss_analysis_pipeline.Rd | 3 ++- man/z_to_pvalue.Rd | 35 ++++++++++++++++++++++++++++++ 8 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 man/auto_decision.Rd create mode 100644 man/extract_cs_info.Rd create mode 100644 man/extract_top_pip_info.Rd create mode 100644 man/get_susie_result.Rd create mode 100644 man/parse_cs_corr.Rd create mode 100644 man/z_to_pvalue.Rd diff --git a/NAMESPACE b/NAMESPACE index 9f3d3773..7dc90a82 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(QUAIL_rank_score_pipeline) export(adjust_susie_weights) export(align_variant_names) export(allele_qc) +export(auto_decision) export(batch_load_twas_weights) export(bayes_a_rss_weights) export(bayes_a_weights) @@ -28,6 +29,8 @@ export(ctwas_ld_loader) export(dentist) export(dentist_single_window) export(enet_weights) +export(extract_cs_info) +export(extract_top_pip_info) export(filter_invalid_summary_stat) export(filter_mixture_components) export(filter_variants_by_ld_reference) @@ -38,6 +41,7 @@ export(fsusie_wrapper) export(gbayes_rss) export(get_ctwas_meta_data) export(get_nested_element) +export(get_susie_result) export(glmnet_weights) export(harmonize_twas) export(lasso_weights) @@ -70,6 +74,7 @@ export(multicontext_ld_clumping) export(multigene_udr) export(multivariate_analysis_pipeline) export(mvsusie_weights) +export(parse_cs_corr) export(parse_region) export(parse_snp_info) export(parse_variant_id) @@ -107,6 +112,7 @@ export(twas_z) export(univariate_analysis_pipeline) export(wald_test_pval) export(xqtl_enrichment_wrapper) +export(z_to_pvalue) import(vroom) importFrom(IRanges,IRanges) importFrom(IRanges,end) @@ -120,6 +126,7 @@ importFrom(bigstatsr,FBM.code256) importFrom(coloc,coloc.bf_bf) importFrom(data.table,as.data.table) importFrom(data.table,fread) +importFrom(data.table,setDT) importFrom(data.table,setnames) importFrom(doFuture,registerDoFuture) importFrom(dplyr,"%>%") @@ -127,6 +134,7 @@ importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) importFrom(dplyr,distinct) importFrom(dplyr,everything) importFrom(dplyr,filter) diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd new file mode 100644 index 00000000..65cae364 --- /dev/null +++ b/man/auto_decision.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{auto_decision} +\alias{auto_decision} +\title{Process Credible Set Information and Determine Updating Strategy} +\usage{ +auto_decision(df, high_corr_cols) +} +\arguments{ +\item{df}{Data frame. Contains information about Credible Sets for a specific study and block.} + +\item{high_corr_cols}{Character vector. Names of columns in df that represent high correlations.} +} +\value{ +A modified data frame with additional columns attached to the diagnostic table: + \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.} + \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.} + \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").} +} +\description{ +This function categorizes Credible Sets (CS) within a study block into different +updating strategies based on their statistical properties and correlations. +} +\details{ +This function performs the following steps: +1. Identifies the top CS based on the highest absolute Z-score. +2. Identifies tagged CS based on high p-value and high correlations. +3. Counts total, tagged, and remaining CS. +4. Determines the appropriate updating method based on these counts. + +The updating methods are: +- BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for. +- SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS. +- BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS. +} +\note{ +This function is part of a developing methodology for automatically handling +finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) +are subject to refinement and may change in future versions. +} diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd new file mode 100644 index 00000000..b91c8d71 --- /dev/null +++ b/man/extract_cs_info.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{extract_cs_info} +\alias{extract_cs_info} +\title{Process Credible Sets (CS) from Finemapping Results} +\usage{ +extract_cs_info(con_data, cs_names, top_loci_table) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file that is not empty.} + +\item{cs_names}{Character vector. Names of the Credible Sets, usually in the format "L_".} + +\item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.} +} +\value{ + +} +\description{ +This function extracts and processes information for each Credible Set (CS) +from finemapping results, typically obtained from a finemapping RDS file. +} +\details{ +This function is designed to be used only when there is at least one Credible Set +in the finemapping results usually for a given study and block. It processes each CS, +extracting key information such as the top variant, its statistics, and +correlation information between multiple CS if available. +} diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd new file mode 100644 index 00000000..2e319b89 --- /dev/null +++ b/man/extract_top_pip_info.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{extract_top_pip_info} +\alias{extract_top_pip_info} +\title{Extract Information for Top Variant from Finemapping Results} +\usage{ +extract_top_pip_info(con_data) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file.} +} +\value{ +A data frame with one row containing the following columns: + \item{cs_name}{NA (as no CS is identified)} + \item{variants_per_cs}{NA (as no CS is identified)} + \item{top_variant}{ID of the variant with the highest PIP} + \item{top_variant_index}{Index of the top variant in the original data} + \item{top_pip}{Highest Posterior Inclusion Probability (PIP)} + \item{top_z}{Z-score of the top variant} + \item{p_value}{P-value calculated from the top Z-score} + \item{cs_corr}{NA (as no CS correlation is available)} +} +\description{ +This function extracts information about the variant with the highest Posterior +Inclusion Probability (PIP) from finemapping results, typically used when no +Credible Sets (CS) are identified in the analysis. +} +\details{ +This function is designed to be used when no Credible Sets are identified in +the finemapping results, but information about the most significant variant +is still desired. It identifies the variant with the highest PIP and extracts +relevant statistical information. +} +\note{ +This function is particularly useful for capturing information about potentially +important variants that might be included in Credible Sets under different +analysis parameters or lower coverage. It maintains a structure similar to +the output of `extract_cs_info()` for consistency in downstream analyses. +} +\seealso{ +\code{\link{extract_cs_info}} for processing when Credible Sets are present. +} diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd new file mode 100644 index 00000000..25706df1 --- /dev/null +++ b/man/get_susie_result.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{get_susie_result} +\alias{get_susie_result} +\title{Extract SuSiE Results from Finemapping Data} +\usage{ +get_susie_result(con_data) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file.} +} +\value{ +The trimmed SuSiE results (`$susie_result_trimmed`) if available, +otherwise NULL. +} +\description{ +This function extracts the trimmed SuSiE results from a finemapping data object, +typically obtained from a finemapping RDS file. It's designed to work with +the method layer of these files, often named as 'method_RAISS_imputed', 'method', +or 'method_NO_QC'. This layer is right under the study layer. +} +\details{ +The function checks if the input data is empty or if the `$susie_result_trimmed` +element is missing. It returns NULL in these cases. If `$susie_result_trimmed` +exists and is not empty, it returns this element. +} +\note{ +This function is particularly useful when working with large datasets +where not all method layers may contain valid SuSiE results or method layer. +} diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd new file mode 100644 index 00000000..2d4d5568 --- /dev/null +++ b/man/parse_cs_corr.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{parse_cs_corr} +\alias{parse_cs_corr} +\title{Parse Credible Set Correlations from extract_cs_info() Output} +\usage{ +parse_cs_corr(df) +} +\arguments{ +\item{df}{Data frame or data.table. The output from `extract_cs_info()` function, +containing a `cs_corr` column with correlation information.} +} +\value{ +A data.table with the original columns from the input, plus: + \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names + based on their position in the original string} + \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)} + \item{cs_corr_min}{Minimum absolute correlation value} +} +\description{ +This function takes the output from `extract_cs_info()` and expands the `cs_corr` column +into multiple columns, preserving the original order of correlations. It also +calculates maximum and minimum correlation values for each Credible Set. +} +\details{ +The function splits the `cs_corr` column, which typically contains correlation +values separated by '|', into individual columns. It preserves the order of +these correlations, allowing for easy interpretation in a matrix-like format. +} +\note{ +- This function converts the input to a data.table if it isn't already one. +- It handles cases where correlation values might be missing or not in the expected format. +- The function assumes that correlation values of 1 represent self-correlations and excludes + these when calculating max and min correlations. +} diff --git a/man/rss_analysis_pipeline.Rd b/man/rss_analysis_pipeline.Rd index 2f3b8c4d..4883920a 100644 --- a/man/rss_analysis_pipeline.Rd +++ b/man/rss_analysis_pipeline.Rd @@ -23,7 +23,8 @@ rss_analysis_pipeline( impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01), pip_cutoff_to_skip = 0, remove_indels = FALSE, - comment_string = "#" + comment_string = "#", + diagnostics = FALSE ) } \arguments{ diff --git a/man/z_to_pvalue.Rd b/man/z_to_pvalue.Rd new file mode 100644 index 00000000..fd47ae03 --- /dev/null +++ b/man/z_to_pvalue.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{z_to_pvalue} +\alias{z_to_pvalue} +\title{Convert Z-scores to P-values} +\usage{ +z_to_pvalue(z) +} +\arguments{ +\item{z}{Numeric vector. The z-scores to be converted to p-values.} +} +\value{ +A numeric vector of p-values corresponding to the input z-scores. +} +\description{ +This function calculates p-values from given z-scores using a two-tailed normal distribution. +It supports vector input to process multiple z-scores simultaneously. +} +\details{ +The function uses the following formula to calculate p-values: +p-value = 2 * Φ(-|z|) +Where Φ is the cumulative distribution function of the standard normal distribution. +} +\note{ +This function assumes that the input z-scores are from a two-tailed test and +are normally distributed. It calculates two-sided p-values. +For extremely large absolute z-scores, the resulting p-values may be computed as zero +due to floating-point limitations in R. This occurs when the absolute z-score > 37. +} +\examples{ +z <- c(2.5, -1.8, 3.2, 0.7) +pvalues <- z_to_pvalue(z) +print(pvalues) + +} From 02839b3d131c9e9e575186b69942651e7ff715b2 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Fri, 18 Apr 2025 19:08:53 +0000 Subject: [PATCH 05/26] update the document and temporarily store this version --- NAMESPACE | 8 +++ R/univariate_diagnostics.R | 5 +- R/univariate_pipeline.R | 126 ++++++++++++++++++----------------- Untitled.ipynb | 64 ++++++++++++++++++ man/auto_decision.Rd | 40 +++++++++++ man/extract_cs_info.Rd | 37 ++++++++++ man/extract_top_pip_info.Rd | 42 ++++++++++++ man/get_susie_result.Rd | 30 +++++++++ man/parse_cs_corr.Rd | 35 ++++++++++ man/rss_analysis_pipeline.Rd | 3 +- man/z_to_pvalue.Rd | 35 ++++++++++ 11 files changed, 359 insertions(+), 66 deletions(-) create mode 100644 Untitled.ipynb create mode 100644 man/auto_decision.Rd create mode 100644 man/extract_cs_info.Rd create mode 100644 man/extract_top_pip_info.Rd create mode 100644 man/get_susie_result.Rd create mode 100644 man/parse_cs_corr.Rd create mode 100644 man/z_to_pvalue.Rd diff --git a/NAMESPACE b/NAMESPACE index 9f3d3773..7dc90a82 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(QUAIL_rank_score_pipeline) export(adjust_susie_weights) export(align_variant_names) export(allele_qc) +export(auto_decision) export(batch_load_twas_weights) export(bayes_a_rss_weights) export(bayes_a_weights) @@ -28,6 +29,8 @@ export(ctwas_ld_loader) export(dentist) export(dentist_single_window) export(enet_weights) +export(extract_cs_info) +export(extract_top_pip_info) export(filter_invalid_summary_stat) export(filter_mixture_components) export(filter_variants_by_ld_reference) @@ -38,6 +41,7 @@ export(fsusie_wrapper) export(gbayes_rss) export(get_ctwas_meta_data) export(get_nested_element) +export(get_susie_result) export(glmnet_weights) export(harmonize_twas) export(lasso_weights) @@ -70,6 +74,7 @@ export(multicontext_ld_clumping) export(multigene_udr) export(multivariate_analysis_pipeline) export(mvsusie_weights) +export(parse_cs_corr) export(parse_region) export(parse_snp_info) export(parse_variant_id) @@ -107,6 +112,7 @@ export(twas_z) export(univariate_analysis_pipeline) export(wald_test_pval) export(xqtl_enrichment_wrapper) +export(z_to_pvalue) import(vroom) importFrom(IRanges,IRanges) importFrom(IRanges,end) @@ -120,6 +126,7 @@ importFrom(bigstatsr,FBM.code256) importFrom(coloc,coloc.bf_bf) importFrom(data.table,as.data.table) importFrom(data.table,fread) +importFrom(data.table,setDT) importFrom(data.table,setnames) importFrom(doFuture,registerDoFuture) importFrom(dplyr,"%>%") @@ -127,6 +134,7 @@ importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) importFrom(dplyr,distinct) importFrom(dplyr,everything) importFrom(dplyr,filter) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index dab979fa..cee9bc70 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -47,8 +47,8 @@ get_susie_result <- function(con_data) { #' \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS} #' \item{top_z}{Z-score of the top variant} #' \item{p_value}{P-value calculated from the top Z-score} -#' \item{cs_corr}{pairwise correlations of other CSs in this RDS with the CS of -#` the current row, delimited by `|`, if there are more than one CSs in this RDS file} +#' \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of +#' the current row, delimited by '|', if there is more than one CS in this RDS file} #' #' @details #' This function is designed to be used only when there is at least one Credible Set @@ -58,7 +58,6 @@ get_susie_result <- function(con_data) { #' #' @importFrom purrr map #' @importFrom dplyr bind_rows -#' @importFrom tibble z_to_pvalue #' #' @export extract_cs_info <- function(con_data, cs_names, top_loci_table) { diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 876b4be2..6e7575d2 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -237,6 +237,8 @@ rss_analysis_pipeline <- function( # Perform fine-mapping if (!is.null(finemapping_method)) { + print("finemapping_method") + print(finemapping_method) pri_coverage <- finemapping_opts$coverage[1] sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL res <- susie_rss_pipeline(sumstats, LD_mat, @@ -247,6 +249,8 @@ rss_analysis_pipeline <- function( secondary_coverage = sec_coverage, signal_cutoff = finemapping_opts$signal_cutoff ) + print("str(res)") + print(str(res)) if (!is.null(qc_method)) { res$outlier_number <- qc_results$outlier_number } @@ -273,45 +277,71 @@ rss_analysis_pipeline <- function( block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) } else { # no CS bvsr_res = get_susie_result(res) - if (sum(bvsr_res$pip > signal_cutoff) > 0) { + if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) { block_cs_metrics = extract_top_pip(res) } } } + print("block_cs_metrics") + print(block_cs_metrics) # sensitive check for additional analyses - if (block_cs_metrics$variants_per_cs > 1) { - block_cs_metrics <- block_cs_metrics %>% - group_by(study, block) %>% - mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) { - NA_real_ # Use NA instead of -Inf when all values are NA - } else { - max(cs_corr_max, na.rm = TRUE) - }) %>% - ungroup() - if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) { - finemapping_method <- "bayesian_conditional_regression" - pri_coverage <- finemapping_opts$coverage[1] - sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL - bcr <- susie_rss_pipeline(sumstats, LD_mat, - n = n, var_y = var_y, - L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, - analysis_method = finemapping_method, - coverage = pri_coverage, - secondary_coverage = sec_coverage, - signal_cutoff = finemapping_opts$signal_cutoff - ) - if (!is.null(qc_method)) { - bcr$outlier_number <- qc_results$outlier_number + if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) { + if (block_cs_metrics$variants_per_cs > 1) { + block_cs_metrics <- block_cs_metrics %>% + group_by(study, block) %>% + mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) { + NA_real_ # Use NA instead of -Inf when all values are NA + } else { + max(cs_corr_max, na.rm = TRUE) + }) %>% + ungroup() + if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) { + finemapping_method <- "bayesian_conditional_regression" + pri_coverage <- finemapping_opts$coverage[1] + sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL + bcr <- susie_rss_pipeline(sumstats, LD_mat, + n = n, var_y = var_y, + L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, + analysis_method = finemapping_method, + coverage = pri_coverage, + secondary_coverage = sec_coverage, + signal_cutoff = finemapping_opts$signal_cutoff + ) + if (!is.null(qc_method)) { + bcr$outlier_number <- qc_results$outlier_number + } + if (impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") + } else if (!impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) + } else { + method_name <- paste0(finemapping_method, "_", "NO_QC") + } + result_list[[method_name]] <- bcr + + finemapping_method <- "single_effect" + sumstats <- preprocess_results$sumstats + LD_mat <- preprocess_results$LD_mat + ser <- susie_rss_pipeline(sumstats, LD_mat, + n = n, var_y = var_y, + L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, + analysis_method = finemapping_method, + coverage = pri_coverage, + secondary_coverage = sec_coverage, + signal_cutoff = finemapping_opts$signal_cutoff + ) + qc_method <- NULL + impute <- FALSE + if (impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") + } else if (!impute & !is.null(qc_method)) { + method_name <- paste0(finemapping_method, "_", toupper(qc_method)) + } else { + method_name <- paste0(finemapping_method, "_", "NO_QC") + } + result_list[[method_name]] <- ser } - if (impute & !is.null(qc_method)) { - method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") - } else if (!impute & !is.null(qc_method)) { - method_name <- paste0(finemapping_method, "_", toupper(qc_method)) - } else { - method_name <- paste0(finemapping_method, "_", "NO_QC") - } - result_list[[method_name]] <- bcr - + } else { # variants_per_cs > 1 or NA finemapping_method <- "single_effect" sumstats <- preprocess_results$sumstats LD_mat <- preprocess_results$LD_mat @@ -325,9 +355,6 @@ rss_analysis_pipeline <- function( ) qc_method <- NULL impute <- FALSE - # if (!is.null(qc_method)) { - # ser$outlier_number <- qc_results$outlier_number - # } if (impute & !is.null(qc_method)) { method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") } else if (!impute & !is.null(qc_method)) { @@ -337,32 +364,7 @@ rss_analysis_pipeline <- function( } result_list[[method_name]] <- ser } - } else { # variants_per_cs > 1 or NA - finemapping_method <- "single_effect" - sumstats <- preprocess_results$sumstats - LD_mat <- preprocess_results$LD_mat - ser <- susie_rss_pipeline(sumstats, LD_mat, - n = n, var_y = var_y, - L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step, - analysis_method = finemapping_method, - coverage = pri_coverage, - secondary_coverage = sec_coverage, - signal_cutoff = finemapping_opts$signal_cutoff - ) - qc_method <- NULL - impute <- FALSE - # if (!is.null(qc_method)) { - # ser$outlier_number <- qc_results$outlier_number - # } - if (impute & !is.null(qc_method)) { - method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed") - } else if (!impute & !is.null(qc_method)) { - method_name <- paste0(finemapping_method, "_", toupper(qc_method)) - } else { - method_name <- paste0(finemapping_method, "_", "NO_QC") - } - result_list[[method_name]] <- ser } - return(result_list = result_list, block_cs_metrics = block_cs_metrics) } + return(list(result_list = result_list, block_cs_metrics = block_cs_metrics)) } diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 00000000..c0b6edfb --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "d0dd1849-98c8-4445-902d-013417dbf52b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n", + " `qr`.\n", + "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n", + "Warning message:\n", + "\"replacing previous import 'vroom::cols' by 'readr::cols' when loading 'pecotmr'\"\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:10: \u001b[1m@examples\u001b[22m requires a value.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:11: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:98: \u001b[1m@examples\u001b[22m requires a value.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:99: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n", + " `qr`.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_univariate_data.Rd': Skipping; no name and/or title.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_regression_data.Rd': Skipping; no name and/or title.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_multivariate_data.Rd': Skipping; no name and/or\n", + " title.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_functional_data.Rd': Skipping; no name and/or title.\n", + "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'mrash_weights.Rd': Skipping; no name and/or title.\n" + ] + } + ], + "source": [ + "devtools::document()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28154cc4-e22b-4b7e-b257-2605b30e8485", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd new file mode 100644 index 00000000..65cae364 --- /dev/null +++ b/man/auto_decision.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{auto_decision} +\alias{auto_decision} +\title{Process Credible Set Information and Determine Updating Strategy} +\usage{ +auto_decision(df, high_corr_cols) +} +\arguments{ +\item{df}{Data frame. Contains information about Credible Sets for a specific study and block.} + +\item{high_corr_cols}{Character vector. Names of columns in df that represent high correlations.} +} +\value{ +A modified data frame with additional columns attached to the diagnostic table: + \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.} + \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.} + \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").} +} +\description{ +This function categorizes Credible Sets (CS) within a study block into different +updating strategies based on their statistical properties and correlations. +} +\details{ +This function performs the following steps: +1. Identifies the top CS based on the highest absolute Z-score. +2. Identifies tagged CS based on high p-value and high correlations. +3. Counts total, tagged, and remaining CS. +4. Determines the appropriate updating method based on these counts. + +The updating methods are: +- BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for. +- SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS. +- BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS. +} +\note{ +This function is part of a developing methodology for automatically handling +finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) +are subject to refinement and may change in future versions. +} diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd new file mode 100644 index 00000000..b9ccba29 --- /dev/null +++ b/man/extract_cs_info.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{extract_cs_info} +\alias{extract_cs_info} +\title{Process Credible Sets (CS) from Finemapping Results} +\usage{ +extract_cs_info(con_data, cs_names, top_loci_table) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file that is not empty.} + +\item{cs_names}{Character vector. Names of the Credible Sets, usually in the format "L_".} + +\item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.} +} +\value{ +A data frame with one row per CS, containing the following columns: + \item{cs_name}{Name of the Credible Set} + \item{variants_per_cs}{Number of variants in the CS} + \item{top_variant}{ID of the variant with the highest PIP in the CS} + \item{top_variant_index}{Global index of the top variant} + \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS} + \item{top_z}{Z-score of the top variant} + \item{p_value}{P-value calculated from the top Z-score} + \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of + the current row, delimited by '|', if there is more than one CS in this RDS file} +} +\description{ +This function extracts and processes information for each Credible Set (CS) +from finemapping results, typically obtained from a finemapping RDS file. +} +\details{ +This function is designed to be used only when there is at least one Credible Set +in the finemapping results usually for a given study and block. It processes each CS, +extracting key information such as the top variant, its statistics, and +correlation information between multiple CS if available. +} diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd new file mode 100644 index 00000000..2e319b89 --- /dev/null +++ b/man/extract_top_pip_info.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{extract_top_pip_info} +\alias{extract_top_pip_info} +\title{Extract Information for Top Variant from Finemapping Results} +\usage{ +extract_top_pip_info(con_data) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file.} +} +\value{ +A data frame with one row containing the following columns: + \item{cs_name}{NA (as no CS is identified)} + \item{variants_per_cs}{NA (as no CS is identified)} + \item{top_variant}{ID of the variant with the highest PIP} + \item{top_variant_index}{Index of the top variant in the original data} + \item{top_pip}{Highest Posterior Inclusion Probability (PIP)} + \item{top_z}{Z-score of the top variant} + \item{p_value}{P-value calculated from the top Z-score} + \item{cs_corr}{NA (as no CS correlation is available)} +} +\description{ +This function extracts information about the variant with the highest Posterior +Inclusion Probability (PIP) from finemapping results, typically used when no +Credible Sets (CS) are identified in the analysis. +} +\details{ +This function is designed to be used when no Credible Sets are identified in +the finemapping results, but information about the most significant variant +is still desired. It identifies the variant with the highest PIP and extracts +relevant statistical information. +} +\note{ +This function is particularly useful for capturing information about potentially +important variants that might be included in Credible Sets under different +analysis parameters or lower coverage. It maintains a structure similar to +the output of `extract_cs_info()` for consistency in downstream analyses. +} +\seealso{ +\code{\link{extract_cs_info}} for processing when Credible Sets are present. +} diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd new file mode 100644 index 00000000..25706df1 --- /dev/null +++ b/man/get_susie_result.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{get_susie_result} +\alias{get_susie_result} +\title{Extract SuSiE Results from Finemapping Data} +\usage{ +get_susie_result(con_data) +} +\arguments{ +\item{con_data}{List. The method layer data from a finemapping RDS file.} +} +\value{ +The trimmed SuSiE results (`$susie_result_trimmed`) if available, +otherwise NULL. +} +\description{ +This function extracts the trimmed SuSiE results from a finemapping data object, +typically obtained from a finemapping RDS file. It's designed to work with +the method layer of these files, often named as 'method_RAISS_imputed', 'method', +or 'method_NO_QC'. This layer is right under the study layer. +} +\details{ +The function checks if the input data is empty or if the `$susie_result_trimmed` +element is missing. It returns NULL in these cases. If `$susie_result_trimmed` +exists and is not empty, it returns this element. +} +\note{ +This function is particularly useful when working with large datasets +where not all method layers may contain valid SuSiE results or method layer. +} diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd new file mode 100644 index 00000000..2d4d5568 --- /dev/null +++ b/man/parse_cs_corr.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/univariate_diagnostics.R +\name{parse_cs_corr} +\alias{parse_cs_corr} +\title{Parse Credible Set Correlations from extract_cs_info() Output} +\usage{ +parse_cs_corr(df) +} +\arguments{ +\item{df}{Data frame or data.table. The output from `extract_cs_info()` function, +containing a `cs_corr` column with correlation information.} +} +\value{ +A data.table with the original columns from the input, plus: + \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names + based on their position in the original string} + \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)} + \item{cs_corr_min}{Minimum absolute correlation value} +} +\description{ +This function takes the output from `extract_cs_info()` and expands the `cs_corr` column +into multiple columns, preserving the original order of correlations. It also +calculates maximum and minimum correlation values for each Credible Set. +} +\details{ +The function splits the `cs_corr` column, which typically contains correlation +values separated by '|', into individual columns. It preserves the order of +these correlations, allowing for easy interpretation in a matrix-like format. +} +\note{ +- This function converts the input to a data.table if it isn't already one. +- It handles cases where correlation values might be missing or not in the expected format. +- The function assumes that correlation values of 1 represent self-correlations and excludes + these when calculating max and min correlations. +} diff --git a/man/rss_analysis_pipeline.Rd b/man/rss_analysis_pipeline.Rd index 2f3b8c4d..4883920a 100644 --- a/man/rss_analysis_pipeline.Rd +++ b/man/rss_analysis_pipeline.Rd @@ -23,7 +23,8 @@ rss_analysis_pipeline( impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01), pip_cutoff_to_skip = 0, remove_indels = FALSE, - comment_string = "#" + comment_string = "#", + diagnostics = FALSE ) } \arguments{ diff --git a/man/z_to_pvalue.Rd b/man/z_to_pvalue.Rd new file mode 100644 index 00000000..fd47ae03 --- /dev/null +++ b/man/z_to_pvalue.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{z_to_pvalue} +\alias{z_to_pvalue} +\title{Convert Z-scores to P-values} +\usage{ +z_to_pvalue(z) +} +\arguments{ +\item{z}{Numeric vector. The z-scores to be converted to p-values.} +} +\value{ +A numeric vector of p-values corresponding to the input z-scores. +} +\description{ +This function calculates p-values from given z-scores using a two-tailed normal distribution. +It supports vector input to process multiple z-scores simultaneously. +} +\details{ +The function uses the following formula to calculate p-values: +p-value = 2 * Φ(-|z|) +Where Φ is the cumulative distribution function of the standard normal distribution. +} +\note{ +This function assumes that the input z-scores are from a two-tailed test and +are normally distributed. It calculates two-sided p-values. +For extremely large absolute z-scores, the resulting p-values may be computed as zero +due to floating-point limitations in R. This occurs when the absolute z-score > 37. +} +\examples{ +z <- c(2.5, -1.8, 3.2, 0.7) +pvalues <- z_to_pvalue(z) +print(pvalues) + +} From eab9173c207168592f50af13c4501a04da3a2c6b Mon Sep 17 00:00:00 2001 From: rl3328 Date: Fri, 18 Apr 2025 19:09:54 +0000 Subject: [PATCH 06/26] rm unnecessary files --- Untitled.ipynb | 64 -------------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 Untitled.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index c0b6edfb..00000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,64 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 5, - "id": "d0dd1849-98c8-4445-902d-013417dbf52b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n", - " `qr`.\n", - "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n", - "Warning message:\n", - "\"replacing previous import 'vroom::cols' by 'readr::cols' when loading 'pecotmr'\"\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:10: \u001b[1m@examples\u001b[22m requires a value.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:11: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:98: \u001b[1m@examples\u001b[22m requires a value.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:99: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n", - " `qr`.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_univariate_data.Rd': Skipping; no name and/or title.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_regression_data.Rd': Skipping; no name and/or title.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_multivariate_data.Rd': Skipping; no name and/or\n", - " title.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_functional_data.Rd': Skipping; no name and/or title.\n", - "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'mrash_weights.Rd': Skipping; no name and/or title.\n" - ] - } - ], - "source": [ - "devtools::document()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28154cc4-e22b-4b7e-b257-2605b30e8485", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "R", - "language": "R", - "name": "ir" - }, - "language_info": { - "codemirror_mode": "r", - "file_extension": ".r", - "mimetype": "text/x-r-source", - "name": "R", - "pygments_lexer": "r", - "version": "4.3.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 300dc078a889d38b378d702fd1c24bed13135cad Mon Sep 17 00:00:00 2001 From: rl3328 Date: Fri, 18 Apr 2025 19:31:08 +0000 Subject: [PATCH 07/26] Update documentation --- man/extract_cs_info.Rd | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd index 7796cdd8..b9ccba29 100644 --- a/man/extract_cs_info.Rd +++ b/man/extract_cs_info.Rd @@ -14,7 +14,6 @@ extract_cs_info(con_data, cs_names, top_loci_table) \item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.} } \value{ - A data frame with one row per CS, containing the following columns: \item{cs_name}{Name of the Credible Set} \item{variants_per_cs}{Number of variants in the CS} @@ -25,3 +24,14 @@ A data frame with one row per CS, containing the following columns: \item{p_value}{P-value calculated from the top Z-score} \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of the current row, delimited by '|', if there is more than one CS in this RDS file} +} +\description{ +This function extracts and processes information for each Credible Set (CS) +from finemapping results, typically obtained from a finemapping RDS file. +} +\details{ +This function is designed to be used only when there is at least one Credible Set +in the finemapping results usually for a given study and block. It processes each CS, +extracting key information such as the top variant, its statistics, and +correlation information between multiple CS if available. +} From 49e47b02580afce34c9bf7e55e37c65bdd693444 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Fri, 18 Apr 2025 21:10:51 +0000 Subject: [PATCH 08/26] parse the cs_corr --- R/univariate_pipeline.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 6e7575d2..63b08833 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -274,11 +274,11 @@ rss_analysis_pipeline <- function( if (isTRUE(bvsr_cs_num > 0)) { # have CS # Get the names of the credible sets cs_names_bvsr = names(bvsr_res$sets$cs) - block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) + block_cs_metrics = extract_cs_info(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) } else { # no CS bvsr_res = get_susie_result(res) if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) { - block_cs_metrics = extract_top_pip(res) + block_cs_metrics = extract_top_pip_info(res) } } } @@ -286,6 +286,9 @@ rss_analysis_pipeline <- function( print(block_cs_metrics) # sensitive check for additional analyses if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) { + block_cs_metrics = parse_cs_corr(block_cs_metrics) + print("block_cs_metrics") + print(str(block_cs_metrics)) if (block_cs_metrics$variants_per_cs > 1) { block_cs_metrics <- block_cs_metrics %>% group_by(study, block) %>% From 6919f0a6c7c8b946aee78b1d2ee36fb8d3f11d6c Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sat, 19 Apr 2025 03:16:27 +0000 Subject: [PATCH 09/26] archive --- R/univariate_diagnostics.R | 14 +++++++++----- R/univariate_pipeline.R | 5 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index cee9bc70..29ee6ece 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -190,7 +190,6 @@ parse_cs_corr <- function(df) { if (!is.data.table(df)) { setDT(df) } - # Function to extract correlations extract_correlations <- function(x) { if(is.na(x) || x == "" || !grepl("\\|", x)) { @@ -210,13 +209,17 @@ parse_cs_corr <- function(df) { min_corr = min(abs(values_filtered), na.rm = TRUE) )) } - + print("df$cs_corr") + print(df$cs_corr) + str(df$cs_corr) # Process correlations processed_results <- lapply(df$cs_corr, extract_correlations) - + print("processed_results") + print(processed_results) # Determine max number of correlations max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) - + print("max_corr_count") + print(max_corr_count) # Create column names col_names <- paste0("cs_corr_", 1:max_corr_count) @@ -226,7 +229,8 @@ parse_cs_corr <- function(df) { if(length(x$values) >= j) x$values[j] else NA_real_ }) }) - + print("corr_list") + print(corr_list) # Add columns to the data.table for(i in seq_along(col_names)) { df[, (col_names[i]) := corr_list[[i]]] diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 63b08833..6dbd9e20 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -274,7 +274,7 @@ rss_analysis_pipeline <- function( if (isTRUE(bvsr_cs_num > 0)) { # have CS # Get the names of the credible sets cs_names_bvsr = names(bvsr_res$sets$cs) - block_cs_metrics = extract_cs_info(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) + block_cs_metrics = extract_cs_info(con_data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) } else { # no CS bvsr_res = get_susie_result(res) if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) { @@ -285,7 +285,8 @@ rss_analysis_pipeline <- function( print("block_cs_metrics") print(block_cs_metrics) # sensitive check for additional analyses - if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) { + if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) { + print("pass") block_cs_metrics = parse_cs_corr(block_cs_metrics) print("block_cs_metrics") print(str(block_cs_metrics)) From 2d46b19b28172abf2ff915714ac2c12c6876c042 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sat, 19 Apr 2025 05:36:29 +0000 Subject: [PATCH 10/26] archive --- R/univariate_diagnostics.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index 29ee6ece..317e93ec 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -190,18 +190,26 @@ parse_cs_corr <- function(df) { if (!is.data.table(df)) { setDT(df) } - # Function to extract correlations + print("df") + print(df) extract_correlations <- function(x) { + # Explicit conversion to character (main change) + x <- as.character(x) + + # Same NA and empty string check if(is.na(x) || x == "" || !grepl("\\|", x)) { return(list(values = numeric(0), max_corr = NA, min_corr = NA)) } - values <- as.numeric(unlist(strsplit(as.character(x), "\\|"))) - + # Conversion to numeric, similar to original + values <- as.numeric(unlist(strsplit(x, "\\|"))) + # Same length check if(length(values) == 0) { return(list(values = numeric(0), max_corr = NA, min_corr = NA)) } + + # Same filtering and calculation values_filtered <- abs(values[values != 1]) return(list( values = values, @@ -211,7 +219,6 @@ parse_cs_corr <- function(df) { } print("df$cs_corr") print(df$cs_corr) - str(df$cs_corr) # Process correlations processed_results <- lapply(df$cs_corr, extract_correlations) print("processed_results") From e6c7bf215b4b5f30541d6cbecfaab40e8cc1b796 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sat, 19 Apr 2025 22:08:33 +0000 Subject: [PATCH 11/26] archive --- R/univariate_diagnostics.R | 14 +++++--------- R/univariate_pipeline.R | 12 +++++------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index 317e93ec..d07a52d4 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -93,7 +93,7 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) { top_pip = top_pip, top_z = top_z, p_value = p_value, - cs_corr = list(cs_corr) # list column if cs_corr is a vector + cs_corr = list(paste(cs_corr, collapse = ",")) # list column if cs_corr is a vector ) return(result) }) @@ -190,19 +190,17 @@ parse_cs_corr <- function(df) { if (!is.data.table(df)) { setDT(df) } - print("df") - print(df) extract_correlations <- function(x) { - # Explicit conversion to character (main change) - x <- as.character(x) + # # Explicit conversion to character (main change) + # x <- as.character(x) # Same NA and empty string check - if(is.na(x) || x == "" || !grepl("\\|", x)) { + if(is.na(x) || x == "" || !grepl(",", x)) { return(list(values = numeric(0), max_corr = NA, min_corr = NA)) } # Conversion to numeric, similar to original - values <- as.numeric(unlist(strsplit(x, "\\|"))) + values <- as.numeric(unlist(strsplit(x, ","))) # Same length check if(length(values) == 0) { @@ -225,8 +223,6 @@ parse_cs_corr <- function(df) { print(processed_results) # Determine max number of correlations max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) - print("max_corr_count") - print(max_corr_count) # Create column names col_names <- paste0("cs_corr_", 1:max_corr_count) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 6dbd9e20..7e1e6fd6 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -290,16 +290,14 @@ rss_analysis_pipeline <- function( block_cs_metrics = parse_cs_corr(block_cs_metrics) print("block_cs_metrics") print(str(block_cs_metrics)) - if (block_cs_metrics$variants_per_cs > 1) { + if (nrow(block_cs_metrics)) { block_cs_metrics <- block_cs_metrics %>% - group_by(study, block) %>% mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) { - NA_real_ # Use NA instead of -Inf when all values are NA + NA_real_ } else { max(cs_corr_max, na.rm = TRUE) - }) %>% - ungroup() - if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) { + }) + if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) { finemapping_method <- "bayesian_conditional_regression" pri_coverage <- finemapping_opts$coverage[1] sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL @@ -345,7 +343,7 @@ rss_analysis_pipeline <- function( } result_list[[method_name]] <- ser } - } else { # variants_per_cs > 1 or NA + } else { # CS = 1 or NA finemapping_method <- "single_effect" sumstats <- preprocess_results$sumstats LD_mat <- preprocess_results$LD_mat From 96b4d2423aa0649929e777bd5d8dc478c79b0b62 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sun, 20 Apr 2025 03:18:23 +0000 Subject: [PATCH 12/26] archive --- R/univariate_diagnostics.R | 65 ++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index d07a52d4..1a818d0b 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -145,14 +145,14 @@ extract_top_pip_info <- function(con_data) { p_value = z_to_pvalue(top_z) # Create the dataframe row data.frame( - cs_name = NA_character_, - variants_per_cs = NA_integer_, + cs_name = NA, + variants_per_cs = NA, top_variant = top_variant, top_variant_index = top_pip_index, top_pip = top_pip, top_z = top_z, p_value = p_value, - cs_corr = NA_character_ + cs_corr = list() ) } @@ -190,30 +190,23 @@ parse_cs_corr <- function(df) { if (!is.data.table(df)) { setDT(df) } + extract_correlations <- function(x) { - # # Explicit conversion to character (main change) - # x <- as.character(x) - - # Same NA and empty string check - if(is.na(x) || x == "" || !grepl(",", x)) { - return(list(values = numeric(0), max_corr = NA, min_corr = NA)) + # Early return if x is invalid + if(is.na(x) || x == "" || is.null(x) || !grepl(",", as.character(x))) { + return(list(values = numeric(0), max_corr = NA_real_, min_corr = NA_real_)) } - # Conversion to numeric, similar to original + # Convert and filter values values <- as.numeric(unlist(strsplit(x, ","))) - - # Same length check - if(length(values) == 0) { - return(list(values = numeric(0), max_corr = NA, min_corr = NA)) - } - - # Same filtering and calculation values_filtered <- abs(values[values != 1]) - return(list( + + # Return list with NA if no valid correlations + list( values = values, - max_corr = max(abs(values_filtered), na.rm = TRUE), - min_corr = min(abs(values_filtered), na.rm = TRUE) - )) + max_corr = if(length(values_filtered) > 0) max(abs(values_filtered), na.rm = TRUE) else NA_real_, + min_corr = if(length(values_filtered) > 0) min(abs(values_filtered), na.rm = TRUE) else NA_real_ + ) } print("df$cs_corr") print(df$cs_corr) @@ -221,27 +214,29 @@ parse_cs_corr <- function(df) { processed_results <- lapply(df$cs_corr, extract_correlations) print("processed_results") print(processed_results) + # If no valid results, add NA columns and return + if(all(sapply(processed_results, function(x) length(x$values) == 0))) { + df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)] + return(df) + } + # Determine max number of correlations max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) - # Create column names + + # Create and add correlation columns col_names <- paste0("cs_corr_", 1:max_corr_count) - # Prepare a list of correlation values for each column - corr_list <- lapply(1:max_corr_count, function(j) { - sapply(processed_results, function(x) { - if(length(x$values) >= j) x$values[j] else NA_real_ - }) - }) - print("corr_list") - print(corr_list) - # Add columns to the data.table for(i in seq_along(col_names)) { - df[, (col_names[i]) := corr_list[[i]]] + df[, (col_names[i]) := sapply(processed_results, function(x) { + if(length(x$values) >= i) x$values[i] else NA_real_ + })] } - # Add max and min columns - df[, cs_corr_max := sapply(processed_results, `[[`, "max_corr")] - df[, cs_corr_min := sapply(processed_results, `[[`, "min_corr")] + # Add max and min correlation columns + df[, `:=`( + cs_corr_max = sapply(processed_results, `[[`, "max_corr"), + cs_corr_min = sapply(processed_results, `[[`, "min_corr") + )] return(df) } From 02afda6441892a1507e9bcaee404a7748dc72838 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sun, 20 Apr 2025 04:09:52 +0000 Subject: [PATCH 13/26] archive --- R/univariate_pipeline.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 7e1e6fd6..4a156109 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -290,13 +290,16 @@ rss_analysis_pipeline <- function( block_cs_metrics = parse_cs_corr(block_cs_metrics) print("block_cs_metrics") print(str(block_cs_metrics)) - if (nrow(block_cs_metrics)) { + cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs)) + if (nrow(cs_row)>1) {# CS > 1 block_cs_metrics <- block_cs_metrics %>% mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) { NA_real_ } else { max(cs_corr_max, na.rm = TRUE) }) + print("block_cs_metrics") + print(str(block_cs_metrics)) if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) { finemapping_method <- "bayesian_conditional_regression" pri_coverage <- finemapping_opts$coverage[1] From 9331532d78b259c4a0b47f80822225adfcac2561 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sun, 20 Apr 2025 05:38:59 +0000 Subject: [PATCH 14/26] final version --- R/univariate_diagnostics.R | 4 ---- R/univariate_pipeline.R | 11 ----------- 2 files changed, 15 deletions(-) diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R index 1a818d0b..b1e91bd2 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_diagnostics.R @@ -208,12 +208,8 @@ parse_cs_corr <- function(df) { min_corr = if(length(values_filtered) > 0) min(abs(values_filtered), na.rm = TRUE) else NA_real_ ) } - print("df$cs_corr") - print(df$cs_corr) # Process correlations processed_results <- lapply(df$cs_corr, extract_correlations) - print("processed_results") - print(processed_results) # If no valid results, add NA columns and return if(all(sapply(processed_results, function(x) length(x$values) == 0))) { df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)] diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 4a156109..6e19c0aa 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -237,8 +237,6 @@ rss_analysis_pipeline <- function( # Perform fine-mapping if (!is.null(finemapping_method)) { - print("finemapping_method") - print(finemapping_method) pri_coverage <- finemapping_opts$coverage[1] sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL res <- susie_rss_pipeline(sumstats, LD_mat, @@ -249,8 +247,6 @@ rss_analysis_pipeline <- function( secondary_coverage = sec_coverage, signal_cutoff = finemapping_opts$signal_cutoff ) - print("str(res)") - print(str(res)) if (!is.null(qc_method)) { res$outlier_number <- qc_results$outlier_number } @@ -282,14 +278,9 @@ rss_analysis_pipeline <- function( } } } - print("block_cs_metrics") - print(block_cs_metrics) # sensitive check for additional analyses if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) { - print("pass") block_cs_metrics = parse_cs_corr(block_cs_metrics) - print("block_cs_metrics") - print(str(block_cs_metrics)) cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs)) if (nrow(cs_row)>1) {# CS > 1 block_cs_metrics <- block_cs_metrics %>% @@ -298,8 +289,6 @@ rss_analysis_pipeline <- function( } else { max(cs_corr_max, na.rm = TRUE) }) - print("block_cs_metrics") - print(str(block_cs_metrics)) if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) { finemapping_method <- "bayesian_conditional_regression" pri_coverage <- finemapping_opts$coverage[1] From 582bf034a1cfa787c8949e88c58040d0f8a95e41 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sun, 20 Apr 2025 21:37:14 +0000 Subject: [PATCH 15/26] standardize the function name --- R/{univariate_diagnostics.R => univariate_rss_diagnostics.R} | 1 - 1 file changed, 1 deletion(-) rename R/{univariate_diagnostics.R => univariate_rss_diagnostics.R} (99%) diff --git a/R/univariate_diagnostics.R b/R/univariate_rss_diagnostics.R similarity index 99% rename from R/univariate_diagnostics.R rename to R/univariate_rss_diagnostics.R index b1e91bd2..723d2fb7 100644 --- a/R/univariate_diagnostics.R +++ b/R/univariate_rss_diagnostics.R @@ -270,7 +270,6 @@ parse_cs_corr <- function(df) { #' @importFrom dplyr case_when #' #' @export -# cs_exp = cs_exp |> filter(!is.na(p_value)) auto_decision <- function(df, high_corr_cols) { # Identify top_cs top_cs_index <- which.max(abs(df$top_z)) From a2a63fcd0e2d47cbd064fc16e9e1349bc010fe6f Mon Sep 17 00:00:00 2001 From: rl3328 Date: Sun, 20 Apr 2025 21:38:55 +0000 Subject: [PATCH 16/26] Update documentation --- man/auto_decision.Rd | 2 +- man/extract_cs_info.Rd | 2 +- man/extract_top_pip_info.Rd | 2 +- man/get_susie_result.Rd | 2 +- man/parse_cs_corr.Rd | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd index 65cae364..d9dbc260 100644 --- a/man/auto_decision.Rd +++ b/man/auto_decision.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/univariate_diagnostics.R +% Please edit documentation in R/univariate_rss_diagnostics.R \name{auto_decision} \alias{auto_decision} \title{Process Credible Set Information and Determine Updating Strategy} diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd index b9ccba29..fda919b6 100644 --- a/man/extract_cs_info.Rd +++ b/man/extract_cs_info.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/univariate_diagnostics.R +% Please edit documentation in R/univariate_rss_diagnostics.R \name{extract_cs_info} \alias{extract_cs_info} \title{Process Credible Sets (CS) from Finemapping Results} diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd index 2e319b89..ce2ee41d 100644 --- a/man/extract_top_pip_info.Rd +++ b/man/extract_top_pip_info.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/univariate_diagnostics.R +% Please edit documentation in R/univariate_rss_diagnostics.R \name{extract_top_pip_info} \alias{extract_top_pip_info} \title{Extract Information for Top Variant from Finemapping Results} diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd index 25706df1..e1895924 100644 --- a/man/get_susie_result.Rd +++ b/man/get_susie_result.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/univariate_diagnostics.R +% Please edit documentation in R/univariate_rss_diagnostics.R \name{get_susie_result} \alias{get_susie_result} \title{Extract SuSiE Results from Finemapping Data} diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd index 2d4d5568..93fea320 100644 --- a/man/parse_cs_corr.Rd +++ b/man/parse_cs_corr.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/univariate_diagnostics.R +% Please edit documentation in R/univariate_rss_diagnostics.R \name{parse_cs_corr} \alias{parse_cs_corr} \title{Parse Credible Set Correlations from extract_cs_info() Output} From b5882e311e764a087d24c9cbdeb1627e2fdb3f27 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 11:48:39 -0400 Subject: [PATCH 17/26] fix some warning when updating package documentation --- R/Untitled Document.ipynb | 1 + R/file_utils.R | 19 +++++++++++++++++++ R/quail_vqtl.R | 5 ++--- R/quantile_twas_weight.R | 1 - R/regularized_regression.R | 6 ++++++ 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 R/Untitled Document.ipynb diff --git a/R/Untitled Document.ipynb b/R/Untitled Document.ipynb new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/R/Untitled Document.ipynb @@ -0,0 +1 @@ + diff --git a/R/file_utils.R b/R/file_utils.R index f0c8ce1e..94533314 100644 --- a/R/file_utils.R +++ b/R/file_utils.R @@ -489,6 +489,11 @@ load_regional_association_data <- function(genotype, # PLINK file )) } +#' Load Regional Univariate Association Data +#' +#' This function loads regional association data for univariate analysis. +#' It includes residual matrices, original genotype data, and additional metadata. +#' #' @importFrom matrixStats colVars #' @return A list #' @export @@ -508,6 +513,11 @@ load_regional_univariate_data <- function(...) { )) } +#' Load Regional Data for Regression Modeling +#' +#' This function loads regional association data formatted for regression modeling. +#' It includes phenotype, genotype, and covariate matrices along with metadata. +#' #' @return A list #' @export load_regional_regression_data <- function(...) { @@ -543,6 +553,11 @@ pheno_list_to_mat <- function(data_list) { return(data_list) } +#' Load and Preprocess Regional Multivariate Data +#' +#' This function loads regional association data and processes it into a multivariate format. +#' It optionally filters out samples based on missingness thresholds in the response matrix. +#' #' @importFrom matrixStats colVars #' @return A list #' @export @@ -578,6 +593,10 @@ load_regional_multivariate_data <- function(matrix_y_min_complete = NULL, # when )) } +#' Load Regional Functional Association Data +#' +#' This function loads precomputed regional functional association data. +#' #' @return A list #' @export load_regional_functional_data <- function(...) { diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R index 9ba03d92..7649c624 100644 --- a/R/quail_vqtl.R +++ b/R/quail_vqtl.R @@ -8,11 +8,10 @@ #' @param return_residuals Logical, whether to return residuals (default: FALSE). #' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}. #' @examples -#' @noRd #' X <- matrix(rnorm(1000), 100, 10) #' y <- rnorm(100) #' results <- univariate_regression(X, y) - +#' @noRd univariate_regression <- function(X, y, Z = NULL, center = TRUE, scale = FALSE, return_residuals = FALSE) { y_na <- which(is.na(y)) @@ -96,10 +95,10 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE, #' @param covariates Optional numeric matrix of covariates (n x k), where k is the number of covariates. #' @return A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}. #' @examples -#' @noRd #' genotype <- matrix(rbinom(1000 * 10, 2, 0.3), 1000, 10) #' phenotype <- rnorm(1000) #' results <- run_linear_regression1(genotype, phenotype) +#' @noRd run_linear_regression <- function(genotype, phenotype, covariates = NULL) { if (!is.null(covariates)) { covariates <- as.data.frame(lapply(covariates, as.numeric)) diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R index 8f722779..dbcd1b88 100644 --- a/R/quantile_twas_weight.R +++ b/R/quantile_twas_weight.R @@ -412,7 +412,6 @@ corr_filter <- function(X, cor_thres = 0.8) { #' @param response Optional response vector for "response_correlation" strategy #' @param max_iterations Maximum number of iterations to attempt removing problematic columns #' @return Cleaned matrix X with problematic columns removed -#' @importFrom stats qr #' @noRd check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) { strategy <- match.arg(strategy) diff --git a/R/regularized_regression.R b/R/regularized_regression.R index b8a84e11..2d21335a 100644 --- a/R/regularized_regression.R +++ b/R/regularized_regression.R @@ -437,6 +437,12 @@ enet_weights <- function(X, y) glmnet_weights(X, y, 0.5) #' @export lasso_weights <- function(X, y) glmnet_weights(X, y, 1) +#' Compute Weights Using mr.ash Shrinkage +#' +#' This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights +#' for a given set of predictors and response. It uses optional prior standard deviation initialization +#' and can accept custom initial beta values. +#' #' @examples #' wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y)) #' @importFrom stats predict From 16ee663f17021d2c2b74ab8762953759165228f8 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 11:49:48 -0400 Subject: [PATCH 18/26] clean up --- R/Untitled Document.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 R/Untitled Document.ipynb diff --git a/R/Untitled Document.ipynb b/R/Untitled Document.ipynb deleted file mode 100644 index 8b137891..00000000 --- a/R/Untitled Document.ipynb +++ /dev/null @@ -1 +0,0 @@ - From 50e8042bfa8994972f083b321f40aa18887a1e2b Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 15:51:16 +0000 Subject: [PATCH 19/26] Update documentation --- man/load_regional_functional_data.Rd | 14 +++++++++++ man/load_regional_multivariate_data.Rd | 15 ++++++++++++ man/load_regional_regression_data.Rd | 15 ++++++++++++ man/load_regional_univariate_data.Rd | 15 ++++++++++++ man/mrash_weights.Rd | 16 ++++++++++++ man/run_linear_regression.Rd | 21 ---------------- man/univariate_regression.Rd | 34 -------------------------- 7 files changed, 75 insertions(+), 55 deletions(-) create mode 100644 man/load_regional_functional_data.Rd create mode 100644 man/load_regional_multivariate_data.Rd create mode 100644 man/load_regional_regression_data.Rd create mode 100644 man/load_regional_univariate_data.Rd create mode 100644 man/mrash_weights.Rd delete mode 100644 man/run_linear_regression.Rd delete mode 100644 man/univariate_regression.Rd diff --git a/man/load_regional_functional_data.Rd b/man/load_regional_functional_data.Rd new file mode 100644 index 00000000..8c66aa6f --- /dev/null +++ b/man/load_regional_functional_data.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_utils.R +\name{load_regional_functional_data} +\alias{load_regional_functional_data} +\title{Load Regional Functional Association Data} +\usage{ +load_regional_functional_data(...) +} +\value{ +A list +} +\description{ +This function loads precomputed regional functional association data. +} diff --git a/man/load_regional_multivariate_data.Rd b/man/load_regional_multivariate_data.Rd new file mode 100644 index 00000000..00880981 --- /dev/null +++ b/man/load_regional_multivariate_data.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_utils.R +\name{load_regional_multivariate_data} +\alias{load_regional_multivariate_data} +\title{Load and Preprocess Regional Multivariate Data} +\usage{ +load_regional_multivariate_data(matrix_y_min_complete = NULL, ...) +} +\value{ +A list +} +\description{ +This function loads regional association data and processes it into a multivariate format. +It optionally filters out samples based on missingness thresholds in the response matrix. +} diff --git a/man/load_regional_regression_data.Rd b/man/load_regional_regression_data.Rd new file mode 100644 index 00000000..f069f59f --- /dev/null +++ b/man/load_regional_regression_data.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_utils.R +\name{load_regional_regression_data} +\alias{load_regional_regression_data} +\title{Load Regional Data for Regression Modeling} +\usage{ +load_regional_regression_data(...) +} +\value{ +A list +} +\description{ +This function loads regional association data formatted for regression modeling. +It includes phenotype, genotype, and covariate matrices along with metadata. +} diff --git a/man/load_regional_univariate_data.Rd b/man/load_regional_univariate_data.Rd new file mode 100644 index 00000000..47651c05 --- /dev/null +++ b/man/load_regional_univariate_data.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_utils.R +\name{load_regional_univariate_data} +\alias{load_regional_univariate_data} +\title{Load Regional Univariate Association Data} +\usage{ +load_regional_univariate_data(...) +} +\value{ +A list +} +\description{ +This function loads regional association data for univariate analysis. +It includes residual matrices, original genotype data, and additional metadata. +} diff --git a/man/mrash_weights.Rd b/man/mrash_weights.Rd new file mode 100644 index 00000000..9d567905 --- /dev/null +++ b/man/mrash_weights.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/regularized_regression.R +\name{mrash_weights} +\alias{mrash_weights} +\title{Compute Weights Using mr.ash Shrinkage} +\usage{ +mrash_weights(X, y, init_prior_sd = TRUE, ...) +} +\description{ +This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights +for a given set of predictors and response. It uses optional prior standard deviation initialization +and can accept custom initial beta values. +} +\examples{ +wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y)) +} diff --git a/man/run_linear_regression.Rd b/man/run_linear_regression.Rd deleted file mode 100644 index 6a37e133..00000000 --- a/man/run_linear_regression.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quail_vqtl.R -\name{run_linear_regression} -\alias{run_linear_regression} -\title{Perform Linear Regression for GWAS} -\usage{ -run_linear_regression(genotype, phenotype, covariates = NULL) -} -\arguments{ -\item{genotype}{Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.} - -\item{phenotype}{Numeric vector of phenotypes (length n).} - -\item{covariates}{Optional numeric matrix of covariates (n x k), where k is the number of covariates.} -} -\value{ -A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}. -} -\description{ -Perform Linear Regression for GWAS -} diff --git a/man/univariate_regression.Rd b/man/univariate_regression.Rd deleted file mode 100644 index bc2897be..00000000 --- a/man/univariate_regression.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quail_vqtl.R -\name{univariate_regression} -\alias{univariate_regression} -\title{Perform Univariate Regression for Genotype Data} -\usage{ -univariate_regression( - X, - y, - Z = NULL, - center = TRUE, - scale = FALSE, - return_residuals = FALSE -) -} -\arguments{ -\item{X}{Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.} - -\item{y}{Numeric vector of phenotypes (length n).} - -\item{Z}{Optional numeric matrix of covariates (n x k), where k is the number of covariates.} - -\item{center}{Logical, whether to center the data (default: TRUE).} - -\item{scale}{Logical, whether to scale the data (default: FALSE).} - -\item{return_residuals}{Logical, whether to return residuals (default: FALSE).} -} -\value{ -A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}. -} -\description{ -Perform Univariate Regression for Genotype Data -} From cf590306f69185799f713918969efa2a5b1b9e8e Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 16:02:36 +0000 Subject: [PATCH 20/26] update the package documentation --- R/Untitled.ipynb | 233 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 R/Untitled.ipynb diff --git a/R/Untitled.ipynb b/R/Untitled.ipynb new file mode 100644 index 00000000..9cb2a10b --- /dev/null +++ b/R/Untitled.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0f57e1d8-6a44-4c87-a967-7bce7f81a899", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n", + "Registered S3 methods overwritten by 'readr':\n", + " method from \n", + " as.data.frame.spec_tbl_df vroom\n", + " as_tibble.spec_tbl_df vroom\n", + " format.col_spec vroom\n", + " print.col_spec vroom\n", + " print.collector vroom\n", + " print.date_names vroom\n", + " print.locale vroom\n", + " str.col_spec vroom\n", + "\n", + "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n", + "Registered S3 methods overwritten by 'RcppGSL':\n", + " method from \n", + " predict.fastLm RcppArmadillo\n", + " print.fastLm RcppArmadillo\n", + " summary.fastLm RcppArmadillo\n", + " print.summary.fastLm RcppArmadillo\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exports from /home/ubuntu/pecotmr/src/dentist_iterative_impute.cpp:\n", + " List dentist_iterative_impute(const arma::mat& LD_mat, size_t nSample, const arma::vec& zScore, double pValueThreshold, float propSVD, bool gcControl, int nIter, double gPvalueThreshold, int ncpus, int seed, bool correct_chen_et_al_bug, bool verbose = false)\n", + "\n", + "Exports from /home/ubuntu/pecotmr/src/mr_ash.cpp:\n", + " List rcpp_mr_ash_rss(const NumericVector& bhat, const NumericVector& shat, const NumericVector& z, const NumericMatrix& R, double var_y, int n, double sigma2_e, const NumericVector& s0, const NumericVector& w0, const NumericVector& mu1_init, double tol = 1e-8, int max_iter = 1e5, bool update_w0 = true, bool update_sigma = true, bool compute_ELBO = true, bool standardize = false, int ncpus = 1)\n", + "\n", + "Exports from /home/ubuntu/pecotmr/src/prscs_mcmc.cpp:\n", + " Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable phi, Rcpp::NumericVector bhat, Rcpp::Nullable maf, int n, Rcpp::List ld_blk, int n_iter, int n_burnin, int thin, bool verbose, Rcpp::Nullable seed)\n", + "\n", + "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.cpp:\n", + " Rcpp::List qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_fit, double pi_gwas = 0, double pi_qtl = 0, int ImpN = 25, double shrinkage_lambda = 1.0, int num_threads = 1)\n", + "\n", + "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.hpp:\n", + "\n", + "Exports from /home/ubuntu/pecotmr/src/sdpr.cpp:\n", + " Rcpp::List sdpr_rcpp(const std::vector& bhat, const Rcpp::List& LD, int n, Rcpp::Nullable per_variant_sample_size = R_NilValue, Rcpp::Nullable array = R_NilValue, double a = 0.1, double c = 1.0, size_t M = 1000, double a0k = 0.5, double b0k = 0.5, int iter = 1000, int burn = 200, int thin = 5, unsigned n_threads = 1, int opt_llk = 1, bool verbose = true)\n", + "\n", + "/home/ubuntu/pecotmr/src/RcppExports.cpp updated.\n", + "/home/ubuntu/pecotmr/R/RcppExports.R updated.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36mℹ\u001b[39m Re-compiling \u001b[34m\u001b[34mpecotmr\u001b[34m\u001b[39m (debug build)\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m──\u001b[39m \u001b[36mR CMD INSTALL\u001b[39m \u001b[36m───────────────────────────────────────────────────────────────\u001b[39m\n", + "* installing *source* package ‘pecotmr’ ...\n", + "** using staged installation\n", + "** libs\n", + "using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 14.2.0-1) 14.2.0’\n", + "using C++11\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c RcppExports.cpp -o RcppExports.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[KRcppExports.cpp:4\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c dentist_iterative_impute.cpp -o dentist_iterative_impute.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kdentist_iterative_impute.cpp:8\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c function_pool.cpp -o function_pool.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/queue:60\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kfunction_pool.h:1\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kfunction_pool.cpp:2\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c mr_ash.cpp -o mr_ash.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kmr_ash.cpp:1\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c prscs_mcmc.cpp -o prscs_mcmc.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kprscs_mcmc.cpp:6\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c qtl_enrichment.cpp -o qtl_enrichment.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kqtl_enrichment.hpp:3\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kqtl_enrichment.cpp:1\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr.cpp -o sdpr.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Ksdpr.cpp:1\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr_mcmc.cpp -o sdpr_mcmc.o\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/stl_algobase.h:59\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/algorithm:60\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Ksdpr_mcmc.cpp:1\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", + " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", + " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", + "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo:27\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Ksdpr_mcmc.h:3\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Ksdpr_mcmc.cpp:15\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo_bits/compiler_check.hpp:87:98:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[K'\u001b[01m\u001b[K#pragma message: INFO: support for C++11 is deprecated; minimum recommended standard is C++14\u001b[m\u001b[K'\n", + " 87 | #pragma message (\"INFO: support for C++11 is deprecated; minimum recommended standard is C++14\"\u001b[01;36m\u001b[K)\u001b[m\u001b[K\n", + " | \u001b[01;36m\u001b[K^\u001b[m\u001b[K\n", + "x86_64-conda-linux-gnu-c++ -std=gnu++11 -shared -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -o pecotmr.so RcppExports.o dentist_iterative_impute.o function_pool.o mr_ash.o prscs_mcmc.o qtl_enrichment.o sdpr.o sdpr_mcmc.o -fopenmp -llapack -lblas -lgfortran -lm -lquadmath -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/python/lib -lgsl -lgslcblas -lm -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -lR\n", + "installing to /tmp/RtmpDn7wph/devtools_install_e03a614934/00LOCK-pecotmr/00new/pecotmr/libs\n", + "** checking absolute paths in shared objects and dynamic libraries\n", + "* DONE (pecotmr)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning message:\n", + "“replacing previous import ‘vroom::cols’ by ‘readr::cols’ when loading ‘pecotmr’”\n" + ] + } + ], + "source": [ + "devtools::document()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9538a0e-86e3-4c15-9319-9ef790457442", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 53f0c2b5bc67fbfcfae5ece39b7aca4c40a15afe Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 16:02:58 +0000 Subject: [PATCH 21/26] clean up --- R/Untitled.ipynb | 233 ----------------------------------------------- 1 file changed, 233 deletions(-) delete mode 100644 R/Untitled.ipynb diff --git a/R/Untitled.ipynb b/R/Untitled.ipynb deleted file mode 100644 index 9cb2a10b..00000000 --- a/R/Untitled.ipynb +++ /dev/null @@ -1,233 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "0f57e1d8-6a44-4c87-a967-7bce7f81a899", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n", - "Registered S3 methods overwritten by 'readr':\n", - " method from \n", - " as.data.frame.spec_tbl_df vroom\n", - " as_tibble.spec_tbl_df vroom\n", - " format.col_spec vroom\n", - " print.col_spec vroom\n", - " print.collector vroom\n", - " print.date_names vroom\n", - " print.locale vroom\n", - " str.col_spec vroom\n", - "\n", - "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n", - "Registered S3 methods overwritten by 'RcppGSL':\n", - " method from \n", - " predict.fastLm RcppArmadillo\n", - " print.fastLm RcppArmadillo\n", - " summary.fastLm RcppArmadillo\n", - " print.summary.fastLm RcppArmadillo\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Exports from /home/ubuntu/pecotmr/src/dentist_iterative_impute.cpp:\n", - " List dentist_iterative_impute(const arma::mat& LD_mat, size_t nSample, const arma::vec& zScore, double pValueThreshold, float propSVD, bool gcControl, int nIter, double gPvalueThreshold, int ncpus, int seed, bool correct_chen_et_al_bug, bool verbose = false)\n", - "\n", - "Exports from /home/ubuntu/pecotmr/src/mr_ash.cpp:\n", - " List rcpp_mr_ash_rss(const NumericVector& bhat, const NumericVector& shat, const NumericVector& z, const NumericMatrix& R, double var_y, int n, double sigma2_e, const NumericVector& s0, const NumericVector& w0, const NumericVector& mu1_init, double tol = 1e-8, int max_iter = 1e5, bool update_w0 = true, bool update_sigma = true, bool compute_ELBO = true, bool standardize = false, int ncpus = 1)\n", - "\n", - "Exports from /home/ubuntu/pecotmr/src/prscs_mcmc.cpp:\n", - " Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable phi, Rcpp::NumericVector bhat, Rcpp::Nullable maf, int n, Rcpp::List ld_blk, int n_iter, int n_burnin, int thin, bool verbose, Rcpp::Nullable seed)\n", - "\n", - "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.cpp:\n", - " Rcpp::List qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_fit, double pi_gwas = 0, double pi_qtl = 0, int ImpN = 25, double shrinkage_lambda = 1.0, int num_threads = 1)\n", - "\n", - "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.hpp:\n", - "\n", - "Exports from /home/ubuntu/pecotmr/src/sdpr.cpp:\n", - " Rcpp::List sdpr_rcpp(const std::vector& bhat, const Rcpp::List& LD, int n, Rcpp::Nullable per_variant_sample_size = R_NilValue, Rcpp::Nullable array = R_NilValue, double a = 0.1, double c = 1.0, size_t M = 1000, double a0k = 0.5, double b0k = 0.5, int iter = 1000, int burn = 200, int thin = 5, unsigned n_threads = 1, int opt_llk = 1, bool verbose = true)\n", - "\n", - "/home/ubuntu/pecotmr/src/RcppExports.cpp updated.\n", - "/home/ubuntu/pecotmr/R/RcppExports.R updated.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[36mℹ\u001b[39m Re-compiling \u001b[34m\u001b[34mpecotmr\u001b[34m\u001b[39m (debug build)\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36m──\u001b[39m \u001b[36mR CMD INSTALL\u001b[39m \u001b[36m───────────────────────────────────────────────────────────────\u001b[39m\n", - "* installing *source* package ‘pecotmr’ ...\n", - "** using staged installation\n", - "** libs\n", - "using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 14.2.0-1) 14.2.0’\n", - "using C++11\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c RcppExports.cpp -o RcppExports.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[KRcppExports.cpp:4\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c dentist_iterative_impute.cpp -o dentist_iterative_impute.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kdentist_iterative_impute.cpp:8\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c function_pool.cpp -o function_pool.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/queue:60\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kfunction_pool.h:1\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kfunction_pool.cpp:2\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c mr_ash.cpp -o mr_ash.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kmr_ash.cpp:1\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c prscs_mcmc.cpp -o prscs_mcmc.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kprscs_mcmc.cpp:6\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c qtl_enrichment.cpp -o qtl_enrichment.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kqtl_enrichment.hpp:3\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kqtl_enrichment.cpp:1\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr.cpp -o sdpr.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Ksdpr.cpp:1\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr_mcmc.cpp -o sdpr_mcmc.o\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/stl_algobase.h:59\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/algorithm:60\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Ksdpr_mcmc.cpp:1\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n", - " 330 | # \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n", - " | \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n", - "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo:27\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Ksdpr_mcmc.h:3\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Ksdpr_mcmc.cpp:15\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo_bits/compiler_check.hpp:87:98:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[K'\u001b[01m\u001b[K#pragma message: INFO: support for C++11 is deprecated; minimum recommended standard is C++14\u001b[m\u001b[K'\n", - " 87 | #pragma message (\"INFO: support for C++11 is deprecated; minimum recommended standard is C++14\"\u001b[01;36m\u001b[K)\u001b[m\u001b[K\n", - " | \u001b[01;36m\u001b[K^\u001b[m\u001b[K\n", - "x86_64-conda-linux-gnu-c++ -std=gnu++11 -shared -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -o pecotmr.so RcppExports.o dentist_iterative_impute.o function_pool.o mr_ash.o prscs_mcmc.o qtl_enrichment.o sdpr.o sdpr_mcmc.o -fopenmp -llapack -lblas -lgfortran -lm -lquadmath -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/python/lib -lgsl -lgslcblas -lm -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -lR\n", - "installing to /tmp/RtmpDn7wph/devtools_install_e03a614934/00LOCK-pecotmr/00new/pecotmr/libs\n", - "** checking absolute paths in shared objects and dynamic libraries\n", - "* DONE (pecotmr)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Warning message:\n", - "“replacing previous import ‘vroom::cols’ by ‘readr::cols’ when loading ‘pecotmr’”\n" - ] - } - ], - "source": [ - "devtools::document()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9538a0e-86e3-4c15-9319-9ef790457442", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "R", - "language": "R", - "name": "ir" - }, - "language_info": { - "codemirror_mode": "r", - "file_extension": ".r", - "mimetype": "text/x-r-source", - "name": "R", - "pygments_lexer": "r", - "version": "4.3.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 1e08aee2cb4aa18eb73963c99fba17d574ab9c9c Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 19:11:18 +0000 Subject: [PATCH 22/26] fix a minor typo --- R/univariate_pipeline.R | 4 +--- R/univariate_rss_diagnostics.R | 15 +++++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 6e19c0aa..7cc68686 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -234,7 +234,6 @@ rss_analysis_pipeline <- function( sumstats <- impute_results$result_filter LD_mat <- impute_results$LD_mat } - # Perform fine-mapping if (!is.null(finemapping_method)) { pri_coverage <- finemapping_opts$coverage[1] @@ -272,14 +271,13 @@ rss_analysis_pipeline <- function( cs_names_bvsr = names(bvsr_res$sets$cs) block_cs_metrics = extract_cs_info(con_data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci) } else { # no CS - bvsr_res = get_susie_result(res) if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) { block_cs_metrics = extract_top_pip_info(res) } } } # sensitive check for additional analyses - if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) { + if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) { block_cs_metrics = parse_cs_corr(block_cs_metrics) cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs)) if (nrow(cs_row)>1) {# CS > 1 diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R index 723d2fb7..4a075318 100644 --- a/R/univariate_rss_diagnostics.R +++ b/R/univariate_rss_diagnostics.R @@ -137,14 +137,13 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) { #' #' @export extract_top_pip_info <- function(con_data) { - # Find the variant with the highest PIP top_pip_index <- which.max(con_data$susie_result_trimmed$pip) top_pip <- con_data$susie_result_trimmed$pip[top_pip_index] - top_variant = con_data$variant_names[top_pip_index] - top_z = con_data$sumstats$z[top_pip_index] - p_value = z_to_pvalue(top_z) - # Create the dataframe row - data.frame( + top_variant <- con_data$variant_names[top_pip_index] + top_z <- con_data$sumstats$z[top_pip_index] + p_value <- z_to_pvalue(top_z) + + list( cs_name = NA, variants_per_cs = NA, top_variant = top_variant, @@ -152,10 +151,11 @@ extract_top_pip_info <- function(con_data) { top_pip = top_pip, top_z = top_z, p_value = p_value, - cs_corr = list() + cs_corr = NA # or NULL or an empty list if that makes more sense ) } + #' Parse Credible Set Correlations from extract_cs_info() Output #' #' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column @@ -215,7 +215,6 @@ parse_cs_corr <- function(df) { df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)] return(df) } - # Determine max number of correlations max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) From 83be4b8fe3fbff75268ff7f6745fb7287049aa03 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Mon, 21 Apr 2025 19:23:27 +0000 Subject: [PATCH 23/26] clean up --- R/univariate_pipeline.R | 1 + R/univariate_rss_diagnostics.R | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 7cc68686..fe3ab70c 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -234,6 +234,7 @@ rss_analysis_pipeline <- function( sumstats <- impute_results$result_filter LD_mat <- impute_results$LD_mat } + # Perform fine-mapping if (!is.null(finemapping_method)) { pri_coverage <- finemapping_opts$coverage[1] diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R index 4a075318..207dfbf9 100644 --- a/R/univariate_rss_diagnostics.R +++ b/R/univariate_rss_diagnostics.R @@ -137,6 +137,7 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) { #' #' @export extract_top_pip_info <- function(con_data) { + # Find the variant with the highest PIP top_pip_index <- which.max(con_data$susie_result_trimmed$pip) top_pip <- con_data$susie_result_trimmed$pip[top_pip_index] top_variant <- con_data$variant_names[top_pip_index] @@ -151,11 +152,10 @@ extract_top_pip_info <- function(con_data) { top_pip = top_pip, top_z = top_z, p_value = p_value, - cs_corr = NA # or NULL or an empty list if that makes more sense + cs_corr = NA # or NULL ) } - #' Parse Credible Set Correlations from extract_cs_info() Output #' #' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column @@ -215,6 +215,7 @@ parse_cs_corr <- function(df) { df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)] return(df) } + # Determine max number of correlations max_corr_count <- max(sapply(processed_results, function(x) length(x$values))) From c565d38339a654adabe8431dfe7ff34393834dd3 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Tue, 22 Apr 2025 20:04:10 +0000 Subject: [PATCH 24/26] final modification --- R/univariate_pipeline.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index fe3ab70c..cd099aad 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -357,7 +357,8 @@ rss_analysis_pipeline <- function( } result_list[[method_name]] <- ser } - } + result_list[["diagnostics"]] <- block_cs_metrics + } } - return(list(result_list = result_list, block_cs_metrics = block_cs_metrics)) + return(result_list) } From eb13f02554297e6592c33b69a8df32a02e5c03c5 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Thu, 24 Apr 2025 13:38:50 -0400 Subject: [PATCH 25/26] improve the documentation of imputation report --- R/raiss.R | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/R/raiss.R b/R/raiss.R index 73fd06c3..4df7a671 100644 --- a/R/raiss.R +++ b/R/raiss.R @@ -346,14 +346,26 @@ filter_raiss_output <- function(zscores, R2_threshold = 0.6, minimum_ld = 5, ver # Print report if (verbose) { + max_label_length <- max(nchar(c( + "Variants before filter:", + "Non-imputed variants:", + "Imputed variants:", + "Variants filtered because of low LD score:", + "Variants filtered because of low R2:", + "Remaining variants after filter:" + ))) + + format_line <- function(label, value) { + sprintf("%-*s %d", max_label_length, paste0(label, ":"), value) + } + message("IMPUTATION REPORT\n") - message("Number of SNPs:\n") - message("before filter:", NSNPs_bf_filt, "\n") - message("not imputed:", NSNPs_initial, "\n") - message("imputed:", NSNPs_imputed, "\n") - message("filtered because of LD:", NSNPs_ld_filt, "\n") - message("filtered because of R2:", NSNPs_R2_filt, "\n") - message("after filter:", NSNPs_af_filt, "\n") + message(format_line("Variants before filter", NSNPs_bf_filt)) + message(format_line("Non-imputed variants", NSNPs_initial)) + message(format_line("Imputed variants", NSNPs_imputed)) + message(format_line("Variants filtered because of low LD score", NSNPs_ld_filt)) + message(format_line("Variants filtered because of low R2", NSNPs_R2_filt)) + message(format_line("Remaining variants after filter", NSNPs_af_filt)) } return(zscore_list = list(zscores_nofilter = zscores_nofilter, zscores = zscores)) } From ff2b37208ac3acc820a03571354b6ac2975f0c13 Mon Sep 17 00:00:00 2001 From: rl3328 Date: Fri, 2 May 2025 11:35:46 -0400 Subject: [PATCH 26/26] change the auto_decision function --- R/LD.R | 5 +---- R/raiss.R | 6 +----- R/univariate_pipeline.R | 5 ----- R/univariate_rss_diagnostics.R | 20 +++++++++++++------- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/R/LD.R b/R/LD.R index 6350b13e..426790ff 100644 --- a/R/LD.R +++ b/R/LD.R @@ -348,9 +348,6 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL # Use region chromosome as fallback for empty blocks block_chroms[j] <- as.character(intersected_LD_files$region$chrom) } - - # Remove large objects to free memory - rm(LD_matrix_processed, extracted_LD_list) } # Create combined LD matrix @@ -494,7 +491,6 @@ partition_LD_matrix <- function(ld_data, merge_small_blocks = TRUE, # Partition the matrix based on block metadata result <- extract_block_matrices(combined_matrix, block_metadata, variant_ids) - return(result) } @@ -708,6 +704,7 @@ extract_block_matrices <- function(matrix, block_metadata, variant_ids) { stringsAsFactors = FALSE ) variant_mapping <- rbind(variant_mapping, block_mapping) + } return(list( diff --git a/R/raiss.R b/R/raiss.R index 4df7a671..278292fa 100644 --- a/R/raiss.R +++ b/R/raiss.R @@ -28,7 +28,7 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01 knowns_id <- intersect(known_zscores$variant_id, ref_panel$variant_id) knowns <- which(ref_panel$variant_id %in% knowns_id) unknowns <- which(!ref_panel$variant_id %in% knowns_id) - + # Handle edge cases if (length(knowns) == 0) { if (verbose) message("No known variants found, cannot perform imputation.") @@ -51,10 +51,8 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01 # Call raiss_model results <- raiss_model(zt, sig_t, sig_i_t, lamb, rcond) - # Format the results results <- format_raiss_df(results, ref_panel, unknowns) - # Filter output results <- filter_raiss_output(results, R2_threshold, minimum_ld, verbose) @@ -72,7 +70,6 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01 } else { as.matrix(LD_matrix) } - # Return results return(list( result_nofilter = result_nofilter, @@ -193,7 +190,6 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01, rcond = 0.01 lamb, rcond, R2_threshold, minimum_ld, verbose = FALSE ) - # Skip if block returned NULL (no known variants) if (!is.null(block_result)) { results_list[[block_id]] <- block_result diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index cd099aad..6826ad1a 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -200,7 +200,6 @@ rss_analysis_pipeline <- function( if (nrow(sumstats)==0){ return(list(rss_data_analyzed = sumstats)) } - # Preprocess the input data preprocess_results <- rss_basic_qc(sumstats, LD_data, skip_region = skip_region, remove_indels = remove_indels) sumstats <- preprocess_results$sumstats @@ -219,14 +218,12 @@ rss_analysis_pipeline <- function( message(paste("Follow-up on region because signals above PIP threshold", pip_cutoff_to_skip, "were detected in initial model screening.")) } } - # Perform quality control if (!is.null(qc_method)) { qc_results <- summary_stats_qc(sumstats, LD_data, n = n, var_y = var_y, method = qc_method) sumstats <- qc_results$sumstats LD_mat <- qc_results$LD_mat } - # Perform imputation if (impute) { LD_matrix <- partition_LD_matrix(LD_data) @@ -234,7 +231,6 @@ rss_analysis_pipeline <- function( sumstats <- impute_results$result_filter LD_mat <- impute_results$LD_mat } - # Perform fine-mapping if (!is.null(finemapping_method)) { pri_coverage <- finemapping_opts$coverage[1] @@ -311,7 +307,6 @@ rss_analysis_pipeline <- function( method_name <- paste0(finemapping_method, "_", "NO_QC") } result_list[[method_name]] <- bcr - finemapping_method <- "single_effect" sumstats <- preprocess_results$sumstats LD_mat <- preprocess_results$LD_mat diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R index 207dfbf9..271b6256 100644 --- a/R/univariate_rss_diagnostics.R +++ b/R/univariate_rss_diagnostics.R @@ -286,16 +286,22 @@ auto_decision <- function(df, high_corr_cols) { # Count total and remaining CS total_cs <- nrow(df) + print("total_cs") + print(total_cs) tagged_cs_count <- sum(df$tagged_cs) - remaining_cs <- total_cs - 1 - tagged_cs_count - + if (total_cs > 0) { + remaining_cs <- total_cs - 1 - tagged_cs_count + } else { + remaining_cs <- 0 + } # Determine method df$method <- case_when( - total_cs == 1 | (tagged_cs_count == 0 & remaining_cs == 0) ~ "BVSR", - remaining_cs == 0 ~ "SER", - remaining_cs > 0 ~ "BCR", - TRUE ~ NA_character_ - ) + tagged_cs_count == 0 & total_cs > 1 ~ "BVSR", + (remaining_cs == 0 & total_cs > 1) | (total_cs == 1) ~ "SER", + remaining_cs > 0 ~ "BCR", + TRUE ~ NA_character_ +) + return(df) } \ No newline at end of file