Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/recipe/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ requirements:
- ${{ stdlib('c') }}
- ${{ compiler('cxx') }}
host:
- bioconductor-biostrings
- bioconductor-iranges
- bioconductor-qvalue
- bioconductor-s4vectors
Expand Down Expand Up @@ -69,6 +70,7 @@ requirements:
- r-vroom
- r-xicor
run:
- bioconductor-biostrings
- bioconductor-iranges
- bioconductor-qvalue
- bioconductor-s4vectors
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Create TOML from recipe
run: |
.github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
mkdir /tmp/pixi
mkdir -p /tmp/pixi
mv ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi

- name: Setup pixi
Expand All @@ -45,7 +45,9 @@ jobs:
manifest-path: /tmp/pixi/pixi.toml

- name: Run unit tests
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test
run: |
pixi config set --manifest-path /tmp/pixi/pixi.toml --local run-post-link-scripts insecure
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test

- name: Check unit test code coverage
if: matrix.platform == 'linux-64' && matrix.environment == 'r44'
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Authors@R: c(person("Gao Wang",role = c("cre","aut"),
person("StatFunGen Lab", role = "ctb"))
License: MIT + file LICENSE
Imports:
Biostrings,
IRanges,
Rcpp,
S4Vectors,
Expand Down
6 changes: 1 addition & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ export(coloc_wrapper)
export(colocboost_analysis_pipeline)
export(compute_LD)
export(compute_qtl_enrichment)
export(ctwas_bimfile_loader)
export(dentist)
export(dentist_from_files)
export(dentist_single_window)
export(dpr_weights)
export(enet_weights)
Expand All @@ -39,7 +37,6 @@ export(find_data)
export(find_duplicate_variants)
export(fsusie_get_cs)
export(fsusie_wrapper)
export(get_ctwas_meta_data)
export(get_filter_lbf_index)
export(get_nested_element)
export(get_ref_variant_info)
Expand All @@ -54,6 +51,7 @@ export(lassosum_rss)
export(lassosum_rss_weights)
export(lbf_to_alpha)
export(ld_loader)
export(ld_mismatch_qc)
export(load_LD_matrix)
export(load_genotype_region)
export(load_multicontext_sumstats)
Expand Down Expand Up @@ -89,7 +87,6 @@ export(normalize_variant_id)
export(otters_association)
export(otters_weights)
export(parse_cs_corr)
export(parse_dentist_output)
export(parse_region)
export(parse_variant_id)
export(perform_qr_analysis)
Expand All @@ -99,7 +96,6 @@ export(qr_screen)
export(quantile_twas_weight_pipeline)
export(raiss)
export(read_afreq)
export(read_dentist_sumstat)
export(region_to_df)
export(rss_analysis_pipeline)
export(rss_basic_qc)
Expand Down
22 changes: 2 additions & 20 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,6 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Compute LD matrix using GCTA-style formula matching the DENTIST binary.
#'
#' This function computes pairwise Pearson correlations from a genotype matrix
#' using the exact same formula and floating-point operation order as the
#' original DENTIST C++ binary (calcLDFromBfile_gcta in bfileOperations.cpp).
#' Key differences from R's crossprod-based approach:
#' 1. Accumulates integer-valued sums sequentially (exact intermediate values)
#' 2. Handles per-pair missing data with GCTA correction formula
#' 3. Divides by nKeptSample (trimmed to multiple of 4), not nrow(X)
#'
#' @param X Numeric genotype matrix (samples x SNPs), values in {0, 1, 2, NA}.
#' Rows should already be trimmed to a multiple of 4.
#' @param ncpus Number of CPU threads for parallel computation.
#' @return Symmetric LD correlation matrix with 1.0 on diagonal.
compute_LD_gcta_cpp <- function(X, ncpus = 1L) {
.Call('_pecotmr_compute_LD_gcta_cpp', PACKAGE = 'pecotmr', X, ncpus)
}

dentist_iterative_impute <- function(LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, correct_chen_et_al_bug, verbose = FALSE) {
.Call('_pecotmr_dentist_iterative_impute', PACKAGE = 'pecotmr', LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, correct_chen_et_al_bug, verbose)
}
Expand All @@ -35,7 +17,7 @@ qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas = 0, pi_qtl
.Call('_pecotmr_qtl_enrichment_rcpp', PACKAGE = 'pecotmr', r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads)
}

sdpr_rcpp <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000L, a0k = 0.5, b0k = 0.5, iter = 1000L, burn = 200L, thin = 5L, n_threads = 1L, opt_llk = 1L, verbose = TRUE) {
.Call('_pecotmr_sdpr_rcpp', PACKAGE = 'pecotmr', bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose)
sdpr_rcpp <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000L, a0k = 0.5, b0k = 0.5, iter = 1000L, burn = 200L, thin = 5L, n_threads = 1L, opt_llk = 1L, verbose = TRUE, seed = NULL) {
.Call('_pecotmr_sdpr_rcpp', PACKAGE = 'pecotmr', bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed)
}

34 changes: 4 additions & 30 deletions R/allele_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,9 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
remove_indels = FALSE, remove_strand_ambiguous = TRUE,
flip_strand = FALSE, remove_unmatched = TRUE, ...) {
strand_flip <- function(ref) {
# Define a mapping for complementary bases
base_mapping <- c("A" = "T", "T" = "A", "G" = "C", "C" = "G")

# Function to complement a single base
complement_base <- function(base) {
complement <- base_mapping[base]
return(complement)
}

# Function to reverse and complement a DNA sequence
reverse_complement <- function(sequence) {
reversed <- rev(strsplit(sequence, NULL)[[1]])
complemented <- sapply(reversed, complement_base, USE.NAMES = FALSE)
return(paste(complemented, collapse = ""))
as.character(Biostrings::reverseComplement(Biostrings::DNAStringSet(ref)))
}

complemented_sequence <- sapply(ref, reverse_complement, USE.NAMES = FALSE)

return(complemented_sequence)
}

# helper to sanitize column names to avoid NA/empty names that break dplyr verbs
sanitize_names <- function(df) {
nm <- colnames(df)
Expand All @@ -67,15 +49,7 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,

# check if the pattern is ATCG/DI
check_ATCG <- function(vec) {
pattern <- "^[ATCGDI]+$"

# Function to check if a single element matches the pattern
check_element <- function(element) {
grepl(pattern, element)
}

result <- sapply(vec, check_element, USE.NAMES = FALSE)
return(result)
grepl("^[ATCGDI]+$", vec)
}

# transform all inputs to dataframe
Expand All @@ -100,7 +74,7 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
if (any(columns_to_remove %in% colnames(target_data))) {
target_data <- select(target_data, -any_of(columns_to_remove))
}
match_result <- merge(target_data, ref_variants, by = c("chrom", "pos"), all = FALSE, suffixes = c(".target", ".ref")) %>% as.data.frame()
match_result <- inner_join(target_data, ref_variants, by = c("chrom", "pos"), suffix = c(".target", ".ref")) %>% as.data.frame()

# sanitize names after merge as well (merge can introduce empty names in edge cases)
match_result <- sanitize_names(match_result)
Expand Down Expand Up @@ -134,7 +108,7 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
}
# if all strand flip is un-ambigous, then we know ambigous cases are indeed a strand flip
# not a sign flip, then we infer there is no ambigous in the whole dataset, and keep those ambigous ones
if (nrow(match_result %>% filter(strand_flip == TRUE) %>% filter(strand_unambiguous == TRUE)) == 0) {
if (!any(match_result$strand_flip & match_result$strand_unambiguous)) {
match_result <- match_result %>% mutate(strand_unambiguous = TRUE)
}

Expand Down
34 changes: 2 additions & 32 deletions R/ctwas_wrapper.R
Original file line number Diff line number Diff line change
@@ -1,35 +1,5 @@
#' @importFrom vroom vroom
#' @export
ctwas_bimfile_loader <- function(bim_file_path) {
snp_info <- as.data.frame(vroom(bim_file_path, col_names = FALSE))
if (ncol(snp_info) == 9) {
colnames(snp_info) <- c("chrom", "id", "GD", "pos", "A1", "A2", "variance", "allele_freq", "n_nomiss")
} else {
colnames(snp_info) <- c("chrom", "id", "GD", "pos", "A1", "A2")
}
snp_info$id <- normalize_variant_id(snp_info$id)
return(snp_info)
}

#' Utility function to format meta data dataframe for cTWAS analyses
#' @importFrom vroom vroom
#' @export
get_ctwas_meta_data <- function(ld_meta_data_file, subset_region_ids = NULL) {
LD_info <- as.data.frame(vroom(ld_meta_data_file))
colnames(LD_info)[1] <- "chrom"
LD_info$region_id <- paste(as.integer(strip_chr_prefix(LD_info$chrom)), LD_info$start, LD_info$end, sep = "_")
LD_info$LD_file <- paste0(dirname(ld_meta_data_file), "/", gsub(",.*$", "", LD_info$path))
LD_info$SNP_file <- paste0(LD_info$LD_file, ".bim")
LD_info <- LD_info[, c("region_id", "LD_file", "SNP_file")]
region_info <- LD_info[, "region_id", drop = FALSE]
region_info$chrom <- as.integer(gsub("\\_.*$", "", region_info$region_id))
region_info$start <- as.integer(gsub("\\_.*$", "", sub("^.*?\\_", "", region_info$region_id)))
region_info$stop <- as.integer(sub("^.*?\\_", "", sub("^.*?\\_", "", region_info$region_id)))
region_info$region_id <- paste0(region_info$chrom, "_", region_info$start, "_", region_info$stop)
region_info <- region_info[, c("chrom", "start", "stop", "region_id")]
if (!is.null(subset_region_ids)) region_info <- region_info[region_info$region_id %in% subset_region_ids, ]
return(list(LD_info = LD_info, region_info = region_info))
}
### File-I/O functions (ctwas_bimfile_loader, get_ctwas_meta_data) have been
### removed. Use ld_loader() and read_bim() from the standard I/O path instead.

#' Function to select variants for ctwas weights input
#' @param region_data A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region.
Expand Down
Loading