Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -597,3 +597,82 @@ z_to_beta_se <- function(z, maf, n) {
se <- 1 / denominator
return(data.frame(beta = beta, se = se, maf = p))
}

#' Filter events based on provided context name pattern
#'
#' @param events A character vector of event names
#' @param filters A data frame with character column of type_pattern, valid_pattern, and exclude_pattern.
#' @param condition Optional label context name
#' @param remove_all_group Logical if \code{TRUE}, removes all events from the same group and character-defined context.
filter_molecular_events <- function(events, filters, condition = NULL, remove_all_group = FALSE) {
# filters is a list of filter specifications
# Each filter spec must have:
# type_pattern: pattern to identify event type
# And at least ONE of:
# valid_pattern: pattern that must exist in group
# exclude_pattern: pattern to exclude

filtered_events <- events
for (filter in filters) {
if (is.null(filter$type_pattern) ||
(is.null(filter$valid_pattern) && is.null(filter$exclude_pattern))) {
stop("Each filter must specify type_pattern and at least one of valid_pattern or exclude_pattern")
}
# Get events of this type
type_events <- filtered_events[grepl(filter$type_pattern, filtered_events)]
type_events_all <- type_events
if (length(type_events) == 0) next
# Apply valid pattern if specified
if (!is.null(filter$valid_pattern)) {
filter$valid_pattern <- strsplit(filter$valid_pattern, ",")[[1]]
valid_groups <- unique(gsub(
filter$type_pattern, "\\1",
type_events[grepl(paste(filter$valid_pattern, collapse = "|"), type_events)]
))
if (length(valid_groups) > 0) {
type_events <- type_events[grepl(paste(filter$valid_pattern, collapse = "|"), type_events)] # filter for valid pattern in type events
} else {
type_events <- character(0)
}
}
# Apply exclusions if specified
if (!is.null(filter$exclude_pattern)) {
filter$exclude_pattern <- strsplit(filter$exclude_pattern, ",")[[1]]
type_events <- type_events[!grepl(paste(filter$exclude_pattern, collapse = "|"), type_events)]
}
if (is.null(condition)) condition <- events
if (length(type_events) == length(events)) {
message(paste("All events matching", filter$type_pattern, "in", condition, "included in following analysis."))
} else if (length(type_events) == 0) {
message(paste("No events matching", filter$type_pattern, "in", condition, "pass the filtering."))
return(NULL)
} else {
exclude_events <- paste0(setdiff(type_events_all, type_events), collapse = ";")
message(paste("Some events,", exclude_events, "in", condition, "are removed. \n"))
if (remove_all_group) {
exclude_events <- setdiff(type_events_all, type_events)
exclude_groups <- gsub(filter$type_pattern, "\\1",
exclude_events[grepl(paste(filter$exclude_pattern, collapse = "|"), exclude_events)]
)
for (i in seq_along(exclude_events)) {
#if (!any(grepl(exclude_groups[i], type_events))) next # skip the event if the corresponding group is all removed
for (x in filter$exclude_pattern) exclude_events[i] <- gsub(x, ".*", exclude_events[i]) # remove exclude pattern from the context
context_key <- gsub("\\b\\d+\\b", "", exclude_events[i]) # remove stand alone numbers (strings such as "lf2" or "chr8" will be kept)
# General pattern to match all events of same group ID and similar character structure
pattern_to_remove <- paste0(".*", exclude_groups[i], ".*")
# Identify all events that match both the context structure and group ID
same_group_events <- type_events[grepl(pattern_to_remove, type_events) & grepl(gsub("\\d+", "", context_key), gsub("\\d+", "", type_events))]
type_events <- setdiff(type_events, same_group_events)
}
}
}
# Update events list
filtered_events <- unique(c(
filtered_events[!grepl(filter$type_pattern, filtered_events)],
type_events
))
}

return(filtered_events)
}

14 changes: 13 additions & 1 deletion R/twas.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ twas_pipeline <- function(twas_weights_data,
mr_pval_cutoff = 0.05,
mr_coverage_column = "cs_coverage_0.95",
quantile_twas = FALSE,
output_twas_data = FALSE) {
output_twas_data = FALSE,
event_filters=NULL) {
# internal function to format TWAS output
format_twas_data <- function(post_qc_twas_data, twas_table) {
weights_list <- do.call(c, lapply(names(post_qc_twas_data), function(molecular_id) {
Expand Down Expand Up @@ -403,6 +404,17 @@ twas_pipeline <- function(twas_weights_data,

# Step 1: TWAS and MR analysis for all methods for imputable gene
rsq_option <- match.arg(rsq_option)
# filter events
if (!is.null(event_filters)){
for (weight_db in names(twas_weights_data)){
contexts <- names(twas_weights_data[[weight_db]]$weights)
filtered_events <- filter_molecular_events(contexts, event_filters, remove_all_group=TRUE)
for (db in names(twas_weights_data[[weight_db]])){
twas_weights_data[[weight_db]][[db]] <- twas_weights_data[[weight_db]][[db]][filtered_events]
}
}
}

# harmonize twas weights and gwas sumstats against LD
twas_data_qced_result <- harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file)
twas_results_db <- lapply(names(twas_weights_data), function(weight_db) {
Expand Down
25 changes: 25 additions & 0 deletions man/filter_molecular_events.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/twas_pipeline.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading