From 750dcd3f4f1905ba56b9787c7b534a3797d6e7b6 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 25 Apr 2025 10:51:22 -0400 Subject: [PATCH 1/3] change --- DESCRIPTION | 3 ++- LICENSE | 3 ++- R/colocboost_init.R | 1 - R/colocboost_plot.R | 3 +++ cran-comments.md | 5 +++++ vignettes/Interpret_ColocBoost_Output.Rmd | 6 ++++++ 6 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 cran-comments.md diff --git a/DESCRIPTION b/DESCRIPTION index 24829f5..e8dbae9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,8 @@ Authors@R: c( Maintainer: Xuewei Cao Description: A multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero - effects on which subsets of response variables, motivated and designed for colocalization analysis of multiple genetic association studies. + effects on which subsets of response variables, motivated and designed for colocalization analysis across genome-wide association studies (GWAS) + and quantitative trait loci (QTL) studies. The ColocBoost model is described in Cao et. al. (2025) . Encoding: UTF-8 LazyDataCompression: xz diff --git a/LICENSE b/LICENSE index 8893d0b..37a50ff 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,3 @@ YEAR: 2025 -COPYRIGHT HOLDER: StatFunGen Lab, Columbia University +COPYRIGHT HOLDER: Xuewei Cao, Haochen Sun, Ru Feng, Daniel Nachun, Kushal Dey, Gao Wang +ORGANIZATION: StatFunGen Lab, Columbia University \ No newline at end of file diff --git a/R/colocboost_init.R b/R/colocboost_init.R index c6e9f85..76c6ceb 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -191,7 +191,6 @@ colocboost_init_model <- function(cb_data, } tmp$multi_correction <- multiple_testing_correction tmp$multi_correction_univariate <- multiple_testing_correction - if (length(multiple_testing_correction) == 1) print(class(multiple_testing_correction)) if (all(multiple_testing_correction == 1)) { tmp$stop_null <- 1 } else if (min(multiple_testing_correction) > multi_test_max) { diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 0af168b..9381837 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -204,6 +204,9 @@ colocboost_plot <- function(cb_output, y = "log10p", } else { bottom <- 2 } + # - restore users' options + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) if (!is.null(cb_plot_init$title)) { par(mfrow = c(nrow, plot_cols), mar = c(bottom, 5, 2, 1), oma = c(0, 0, 3, 0)) } else { diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..858617d --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,5 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index 1c6571e..bcda621 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -234,19 +234,25 @@ plot(res$model_info$outcome_profile_loglik[[i]], type="p", col="#CC3333", lwd=2, ```{r objective-proximity} +# Save to restore default options +oldpar <- par(no.readonly = TRUE) # Plotting trait-specific proximity objective par(mfrow=c(2,3), mar=c(4,4,2,1)) for(i in 1:5){ plot(res$model_info$outcome_proximity_obj[[i]], type="p", col="#3366CC", lwd=2, xlab="", ylab="Trait-specific Objective", main = paste0("Trait ", i)) } +par(oldpar) ``` ```{r objective-best} +# Save to restore default options +oldpar <- par(no.readonly = TRUE) # Plotting trait-specific objective at the best update variant par(mfrow=c(2,3), mar=c(4,4,2,1)) for(i in 1:5){ plot(res$model_info$outcome_coupled_best_update_obj[[i]], type="p", col="#CC3333", lwd=2, xlab="", ylab=paste0("Objective at best update variant"), main = paste0("Trait ", i)) } +par(oldpar) ``` ### 3.5. Trait-specific effects information (**`ucos_details`**) From da6b02f0dcfa278240adf805247a77a739339d77 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 25 Apr 2025 11:14:05 -0400 Subject: [PATCH 2/3] add wordlist --- cran-comments.md | 10 +++ inst/WORDLIST | 154 +++++++++++++++++++++-------------------------- 2 files changed, 78 insertions(+), 86 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 858617d..a9736ba 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -3,3 +3,13 @@ 0 errors | 0 warnings | 1 note * This is a new release. + +## Addtional comments + +* This package implements methods described in our paper "ColocBoost" (Cao et al., 2025), added in DESCRIPTION +* Fixed issues requested by CRAN in previous submission: + - Reduced tarball less than 5 MB + - Fixed reset users' options issues + - Added proper COPYRIGHT HOLDER and ORGANIZATION to LICENSE + - Added explanation of acronyms used in this package to inst/WORDLIST +* The examples and vignettes use small datasets to avoid long check times \ No newline at end of file diff --git a/inst/WORDLIST b/inst/WORDLIST index 7a13d7e..080898b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,86 +1,68 @@ -Biobank -Bioinformatics -CFB -COLOC -CoS -Codecov -ColocBoost -Colocalization -Colocalized -Conda -FineBoost -GTEx -GWAS -HyPrColoc -INDELs -Jager -KK -LD -MAF -Mazumder -Micromamba -NPC -Najar -Nealelab -PIPs -PLINK -Pre -Recalibrate -SEL -SuSiE -Sumstat -UKBB -VCP -VPA -Xcorr -YI -al -bioinformatics -chrom -cis -colocalization -colocalize -colocalized -conda -de -decayrate -doi -eQTL -et -grey -iteratively -jk -ld -lfsr -lth -maf -medRxiv -modularity -nd -npc -omics -phenotypes -pixi -pos -pre -probabilistically -pvalue -qc -rcond -recalibrate -recalibrated -reconciliate -repo -rss -sQTL -subsampled -sumstat -sumstats -tabix -uCoS -uS -ucos -uncolocalized -vcp -xQTL -xQTLs +# Analysis Tools and Methods +ColocBoost # Multiple trait colocalization algorithm we propsed +Colocalization # Method to identify shared genetic signals +Colocalized # Having undergone colocalization analysis +COLOC # A pairwise colocalization method name +FineBoost # Single trait fine-mapping algorithm we proposed +HyPrColoc # Hypothesis Prioritization in multi-trait Colocalization method name +SuSiE # Sum of Single Effects regression model + +# Statistical and Genetic Terms +CoS # Colocalization confidence set in our proposed ColocBoost method +eQTL # Expression Quantitative Trait Loci +GWAS # Genome-Wide Association Study +INDELs # Insertions and Deletions +LD # Linkage Disequilibrium +MAF # Minor Allele Frequency +NPC # Normalization probability of colocalization in our proposed ColocBoost method +PIPs # Posterior Inclusion Probabilities +pvalue # Statistical p-value +sQTL # Splicing Quantitative Trait Loci +SEL # Single-effect learner in our proposed ColocBoost method +Sumstat # Summary Statistics +xQTL # Any molecular Quantitative Trait Loci + +# Software, Platforms and Tools +Biobank # Refer to UK Biobank, a large-scale biomedical database and research resource +Bioinformatics # Computational biology field +Codecov # Code coverage testing tool +Conda # Package and environment management system +GTEx # Genotype-Tissue Expression project +Micromamba # Lightweight Conda implementation +Nealelab # Lab developing genetics GWAS summary statistics +PLINK # Whole genome association analysis toolset +tabix # Tool for indexing genomic data +UKBB # UK Biobank dataset + +# Researcher Names +Jager # Philip L. de Jager +KK # Kushal K. Dey +Mazumder # Rahul Mazumder +Najar # Cristian F. B. Najar +CFB # Cristian F. B. Najar + +# Technical Terms +cis # Referring to nearby location of a regulatory element +chrom # Chromosome +decayrate # Decay rate, an input parameter in our package +doi # Digital Object Identifier +grey # One color name in R +iteratively # Performed through iterations +lfsr # Local False Sign Rate +lth # Lower threshold +modularity # Property of network structure +omics # Collective biological data fields +phenotypes # Observable traits +pos # Position in genome +probabilistically # Based on probability theory +qc # Quality Control +rcond # Reciprocal condition number +reconciliate # Process of resolving discrepancies +repo # Repository +rss # Residual Sum of Squares +subsampled # Analyzed using data subsets +uncolocalized # Not showing colocalization +uCoS # Trait-specific (uncolocalization) confidence set +VCP # Variant colocalization probability +Xcorr # Cross-correlation matrix +Recalibrate # Process of adjusting statistical calibration \ No newline at end of file From 58e46927b81110a870d7a8f9043f0d6d21bafd39 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 25 Apr 2025 11:25:57 -0400 Subject: [PATCH 3/3] Update WORDLIST --- inst/WORDLIST | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index 080898b..072822b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,30 +1,30 @@ # Analysis Tools and Methods ColocBoost # Multiple trait colocalization algorithm we propsed Colocalization # Method to identify shared genetic signals -Colocalized # Having undergone colocalization analysis COLOC # A pairwise colocalization method name FineBoost # Single trait fine-mapping algorithm we proposed HyPrColoc # Hypothesis Prioritization in multi-trait Colocalization method name SuSiE # Sum of Single Effects regression model # Statistical and Genetic Terms -CoS # Colocalization confidence set in our proposed ColocBoost method eQTL # Expression Quantitative Trait Loci GWAS # Genome-Wide Association Study INDELs # Insertions and Deletions LD # Linkage Disequilibrium +ld # Linkage Disequilibrium MAF # Minor Allele Frequency -NPC # Normalization probability of colocalization in our proposed ColocBoost method -PIPs # Posterior Inclusion Probabilities +maf # Minor Allele Frequency pvalue # Statistical p-value sQTL # Splicing Quantitative Trait Loci -SEL # Single-effect learner in our proposed ColocBoost method Sumstat # Summary Statistics +sumstat # Summary Statistics +sumstats # Summary Statistics xQTL # Any molecular Quantitative Trait Loci # Software, Platforms and Tools Biobank # Refer to UK Biobank, a large-scale biomedical database and research resource Bioinformatics # Computational biology field +bioinformatics # Computational biology field Codecov # Code coverage testing tool Conda # Package and environment management system GTEx # Genotype-Tissue Expression project @@ -33,6 +33,9 @@ Nealelab # Lab developing genetics GWAS summary statistics PLINK # Whole genome association analysis toolset tabix # Tool for indexing genomic data UKBB # UK Biobank dataset +medRxiv # A preprint resource +pixi # An environment manager +conda # Package and environment management system # Researcher Names Jager # Philip L. de Jager @@ -40,6 +43,9 @@ KK # Kushal K. Dey Mazumder # Rahul Mazumder Najar # Cristian F. B. Najar CFB # Cristian F. B. Najar +YI # Yang I. Li +et # and more +al # and more # Technical Terms cis # Referring to nearby location of a regulatory element @@ -63,6 +69,24 @@ rss # Residual Sum of Squares subsampled # Analyzed using data subsets uncolocalized # Not showing colocalization uCoS # Trait-specific (uncolocalization) confidence set +ucos # Trait-specific (uncolocalization) confidence set VCP # Variant colocalization probability +VPA # Variant probability of association +vcp # Variant colocalization probability Xcorr # Cross-correlation matrix -Recalibrate # Process of adjusting statistical calibration \ No newline at end of file +Recalibrate # Process of adjusting statistical calibration +recalibrate # Process of adjusting statistical calibration +recalibrated # Process of adjusting statistical calibration +Colocalized # Having undergone colocalization analysis +colocalize # Having undergone colocalization analysis +colocalized # Having undergone colocalization analysis +CoS # Colocalization confidence set in our proposed ColocBoost method +NPC # Normalization probability of colocalization in our proposed ColocBoost method +PIPs # Posterior Inclusion Probabilities +SEL # Single-effect learner in our proposed ColocBoost method +uS # The number of uCoS +npc # Normalization probability of colocalization in our proposed ColocBoost method +Pre # Before +pre # Before +jk # Index used in ColocBoost +nd # Second \ No newline at end of file