Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ LinkingTo:
RcppGSL
NeedsCompilation: yes
VignetteBuilder: knitr
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ importFrom(data.table,setnames)
importFrom(doFuture,registerDoFuture)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_rows)
Expand Down
23 changes: 21 additions & 2 deletions R/LD.R
Original file line number Diff line number Diff line change
Expand Up @@ -355,16 +355,33 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL
LD_matrices = extracted_LD_matrices_list,
variants = extracted_LD_variants_list
)
# Normalize LD variant IDs at load time: add 'chr' prefix if missing
if (!is.null(colnames(combined_LD_matrix))) {
cn <- as.character(colnames(combined_LD_matrix))
colnames(combined_LD_matrix) <- ifelse(startsWith(cn, "chr"), cn, paste0("chr", cn))
}
if (!is.null(rownames(combined_LD_matrix))) {
rn <- as.character(rownames(combined_LD_matrix))
rownames(combined_LD_matrix) <- ifelse(startsWith(rn, "chr"), rn, paste0("chr", rn))
}
combined_LD_variants <- rownames(combined_LD_matrix)

# Now create block_metadata with all the information we've accumulated
block_variants <- lapply(extracted_LD_variants_list, function(v) v$variants)
# Normalize block_variants to match combined_LD_variants format (add 'chr' if needed)
block_variants_normalized <- lapply(block_variants, function(v) {
if (!any(startsWith(v, "chr"))) {
paste0("chr", v)
} else {
v
}
})
block_metadata <- data.frame(
block_id = seq_along(LD_file_paths),
chrom = block_chroms,
size = sapply(block_variants, length),
start_idx = sapply(block_variants, function(v) min(match(v, combined_LD_variants))),
end_idx = sapply(block_variants, function(v) max(match(v, combined_LD_variants))),
start_idx = sapply(block_variants_normalized, function(v) min(match(v, combined_LD_variants))),
end_idx = sapply(block_variants_normalized, function(v) max(match(v, combined_LD_variants))),
stringsAsFactors = FALSE
)

Expand All @@ -375,6 +392,8 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL
ref_panel <- do.call(rbind, lapply(strsplit(rownames(combined_LD_matrix), ":"), function(x) {
data.frame(chrom = x[1], pos = as.integer(x[2]), A2 = x[3], A1 = x[4])
}))
# Normalize ref_panel chrom to drop 'chr' prefix for merging with sumstats (which often use numeric chrom)
ref_panel$chrom <- sub("^chr", "", as.character(ref_panel$chrom))

merged_variant_list <- do.call(rbind, extracted_LD_variants_list)
ref_panel$variant_id <- rownames(combined_LD_matrix)
Expand Down
235 changes: 131 additions & 104 deletions R/allele_qc.R

Large diffs are not rendered by default.

31 changes: 27 additions & 4 deletions R/colocboost_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -436,13 +436,32 @@ colocboost_analysis_pipeline <- function(region_data,
if (!is.null(sumstat_data$sumstats)) {
sumstats <- lapply(sumstat_data$sumstats, function(ss) {
z <- ss$sumstats$z
variant <- paste0("chr", ss$sumstats$variant_id)
# Prefix 'chr' only if missing to avoid 'chrchr'
vi <- ss$sumstats$variant_id
has_chr <- startsWith(as.character(vi), "chr")
variant <- ifelse(has_chr, as.character(vi), paste0("chr", as.character(vi)))
n <- ss$n
data.frame("z" = z, "n" = n, "variant" = variant)

# Filter out NA values from z-scores and corresponding variants
na_mask <- !is.na(z)
if (sum(na_mask) == 0) {
message("Warning: All z-scores are NA for this summary statistic dataset")
return(data.frame("z" = numeric(0), "n" = numeric(0), "variant" = character(0)))
}

data.frame("z" = z[na_mask], "n" = n, "variant" = variant[na_mask])
})
names(sumstats) <- names(sumstat_data$sumstats)
LD_mat <- lapply(sumstat_data$LD_mat, function(ld) {
colnames(ld) <- rownames(ld) <- paste0("chr", colnames(ld))
# Ensure LD dimnames have 'chr' only once
cn <- colnames(ld)
rn <- rownames(ld)
if (!is.null(cn)) {
colnames(ld) <- ifelse(startsWith(as.character(cn), "chr"), as.character(cn), paste0("chr", as.character(cn)))
}
if (!is.null(rn)) {
rownames(ld) <- ifelse(startsWith(as.character(rn), "chr"), as.character(rn), paste0("chr", as.character(rn)))
}
return(ld)
})
LD_match <- sumstat_data$LD_match
Expand Down Expand Up @@ -687,7 +706,11 @@ qc_regional_data <- function(region_data,
n <- sumstat$n
var_y <- sumstat$var_y
conditions_sumstat <- names(sumstats)[ii]
pip_cutoff_to_skip_ld <- pip_cutoff_to_skip_sumstat[conditions_sumstat] %>% as.numeric()
pip_cutoff_to_skip_ld <- if (conditions_sumstat %in% names(pip_cutoff_to_skip_sumstat)) {
as.numeric(pip_cutoff_to_skip_sumstat[conditions_sumstat])
} else {
0
}

# Preprocess the input data
preprocess_results <- rss_basic_qc(sumstat$sumstats, LD_data, remove_indels = remove_indels)
Expand Down
8 changes: 4 additions & 4 deletions R/raiss.R
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ filter_raiss_output <- function(zscores, R2_threshold = 0.6, minimum_ld = 5, ver

# Count statistics before filtering
NSNPs_bf_filt <- nrow(zscores)
NSNPs_initial <- sum(zscores$raiss_R2 == 2.0)
NSNPs_imputed <- sum(zscores$raiss_R2 != 2.0)
NSNPs_ld_filt <- sum(zscores$raiss_ld_score < minimum_ld)
NSNPs_R2_filt <- sum(zscores$raiss_R2 < R2_threshold)
NSNPs_initial <- sum(zscores$raiss_R2 == 2.0, na.rm = TRUE)
NSNPs_imputed <- sum(zscores$raiss_R2 != 2.0, na.rm = TRUE)
NSNPs_ld_filt <- sum(zscores$raiss_ld_score < minimum_ld, na.rm = TRUE)
NSNPs_R2_filt <- sum(zscores$raiss_R2 < R2_threshold, na.rm = TRUE)

# Apply filters
zscores_nofilter <- zscores
Expand Down
23 changes: 23 additions & 0 deletions R/sumstats_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,29 @@ rss_basic_qc <- function(sumstats, LD_data, skip_region = NULL, remove_indels =

sumstats_processed <- allele_flip$target_data_qced %>% arrange(pos)

# Align and subset LD by mapping core IDs (strip trailing build suffix) to exact LD IDs
ld_mat <- LD_data$combined_LD_matrix
ld_ids <- tryCatch(rownames(ld_mat), error = function(e) NULL)
if (is.null(ld_ids)) {
stop("LD matrix rownames are NULL; cannot align variant IDs.")
}
present <- sumstats_processed$variant_id %in% ld_ids
if (sum(present) == 0) {
strip_build <- function(x) sub("(:|_)b[0-9]+$", "", x)
drop_chr <- function(x) sub("^chr", "", x)
ld_core <- drop_chr(strip_build(ld_ids))
ss_core <- drop_chr(strip_build(sumstats_processed$variant_id))
map_idx <- match(ss_core, ld_core)
remap <- !is.na(map_idx)
if (sum(remap) > 0) {
sumstats_processed$variant_id[remap] <- ld_ids[map_idx[remap]]
present <- sumstats_processed$variant_id %in% ld_ids
}
}
if (sum(present) == 0) {
stop("No overlapping variants between sumstats and LD after alignment.")
}

LD_mat_processed <- LD_data$combined_LD_matrix[sumstats_processed$variant_id, sumstats_processed$variant_id, drop = FALSE]

return(list(sumstats = sumstats_processed, LD_mat = LD_mat_processed))
Expand Down
9 changes: 8 additions & 1 deletion man/align_variant_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading