-
Notifications
You must be signed in to change notification settings - Fork 0
Home
GWAS of ECT-based predicted TRD
Inverse variance weighted meta-analysis is conducted in both METAL and PLINK. PLINK is run for the CHR BP columns.
Leave one out (LOO) analyses are for polygenic risk score (PRS) association studies.
Sample script for meta-analysis using METAL and PLINK are in metal_script.sh
Cohort specific summary statistics from samples in VUMC, MGB, Geisinger, and MVP are available here for download.
# make sure header has same format
# sample metal script with the following input
sumstat_path=/data/ruderferlab/projects/trd # edit for path to summary statistics
outpath=/data/ruderferlab/projects/trd/results/meta/202205_updated_MVP_MGB_VU # edit for path for output
# input for VUMC model meta-analysis
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.vumc_model.sumstat.gz
file3=$sumstat_path/MGB.trd.vumc_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.vumc_model.sumstat.gz
meta1_out=${outpath}/202205_updated_MVP_MGB_VU_normvu
sh metal_script.sh outpath file1 file2 file3 file4 meta1_out
# input for MGB model meta-analysis
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.mgh_model.sumstat.gz
file3=$sumstat_path/MGB.trd.mgh_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.mgh_model.sumstat.gz
meta2_out=${outpath}/202205_updated_MVP_MGB_VU_normmgh
sh metal_script.sh outpath file1 file2 file3 file4 meta2_out
Output meta-analyses generates .tbl file from METAL and .meta file from PLINK.
LOO_${site} where summary statistic of site among VUMC, MGB, and MVP is excluded from meta-analysis
Sample script for meta-analysis using METAL and PLINK are in metal_LOO.sh
# Define output path
outpath=${outpath}/results/meta/202205_updated_MVP_MGB_VU/LOO
# LOO MVP
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normvu.qt.sumstat
file3=$sumstat_path/MGB.trd.vumc_model.sumstat
meta3_out=${outpath}/LOO_MVP_normvu
sh metal_LOO.sh outpath file1 file2 file3 meta3_out
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normmgh.qt.sumstat
file3=$sumstat_path/MGB.trd.mgh_model.sumstat
meta4_out=${outpath}/LOO_MVP_normmgh
sh metal_LOO.sh outpath file1 file2 file3 meta4_out
# LOO VUMC
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat
file2=$sumstat_path/MGB.trd.vumc_model.sumstat
file3=$sumstat_path/MVP.trd.v4.vumc_model.sumstat
meta5_out=${outpath}/LOO_VUMC_normvu
sh metal_LOO.sh outpath file1 file2 file3 meta5_out
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat
file2=$sumstat_path/MGB.trd.mgh_model.sumstat
file3=$sumstat_path/MVP.trd.v4.mgh_model.sumstat
meta6_out=${outpath}/LOO_VUMC_normmgh
sh metal_LOO.sh outpath file1 file2 file3 meta6_out
# LOO MGB
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normvu.qt.sumstat
file3=$sumstat_path/MVP.trd.v4.vumc_model.sumstat
meta7_out=${outpath}/LOO_MGB_normvu
sh metal_LOO.sh outpath file1 file2 file3 meta7_out
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normmgh.qt.sumstat
file3=$sumstat_path/MVP.trd.v4.mgh_model.sumstat
meta8_out=${outpath}/LOO_MGB_normmgh
sh metal_LOO.sh outpath file1 file2 file3 meta8_out
Filter for 80% of total sample size (N) and merge with plink output for CHR BP for QQ and Manhattan plotting. Save output file as .ivw
library(data.table); library(qqman); library(dplyr)
m=fread("202205_updated_MVP_MGB_VU_normmgh1.tbl")
v=fread("202205_updated_MVP_MGB_VU_normvu1.tbl")
names(m)[10] = "P"
names(v)[10] = "P"
# MGB model meta-analysis
pm = fread("202205_updated_MVP_MGB_VU_normmgh.meta")
m1 = m %>% filter(TN > max(m$TN)*0.8)
m2 = merge(pm[,c(1,2,3)], m1, by.x = "SNP", by.y = "MarkerName")
# plot Manhattan and QQ plot
m.sub = m2 %>% filter(P < 5e-2)
title = "202205_updated_MVP_MGB_VU_normmgh"
png(paste(title,"_manhattan.png",sep=""), width = 12, height = 5, units = "in", res=220, type = "cairo")
manhattan(m.sub, col = c("#2C9696", "#005A5A"), suggestiveline=FALSE,
annotatePval=5e-8, annotateTop=TRUE, main=title)
dev.off()
title = "202205_updated_MVP_MGB_VU_normmgh"
png(paste(title,"_qq.png",sep=""), width = 5, height = 5, units = "in", res=220, type = "cairo")
qq(m2$P, main = title)
dev.off()
# VUMC model meta-analysis
pv = fread("202205_updated_MVP_MGB_VU_normvu.meta")
max = max(v$TN)*0.8
v1 = v %>% filter(TN > max)
v2 = merge(pv[,c(1,2,3)], v1, by.x = "SNP", by.y = "MarkerName")
v.sub = v2 %>% filter(P < 5e-2)
# plot Manhattan and QQ plot
title = "202205_updated_MVP_MGB_VU_normvu"
png(paste(title,"_manhattan.png",sep=""), width = 12, height = 5, units = "in", res=220, type = "cairo")
manhattan(v.sub, col = c("#2C9696", "#005A5A"), suggestiveline=FALSE,
annotatePval=5e-8, annotateTop=TRUE, main=title)
dev.off()
title = "202205_updated_MVP_MGB_VU_normvu"
png(paste(title,"_qq.png",sep=""), width = 5, height = 5, units = "in", res=220, type = "cairo")
qq(v2$P, main = title)
dev.off()
# Write out meta-analyses with columns CHR BP SNP A1 A2 BETA SE P FREQ NMISS
m3 = m2[,c(2,3,1,4,5,10,11,12,6,18)]
v3 = v2[,c(2,3,1,4,5,10,11,12,6,18)]
colnames(m3) = c("CHR", "BP", "SNP", "A1", "A2", "BETA", "SE", "P", "FREQ", "NMISS")
colnames(v3) = c("CHR", "BP", "SNP", "A1", "A2", "BETA", "SE", "P", "FREQ", "NMISS")
m3$A1 = toupper(m3$A1)
m3$A2 = toupper(m3$A2)
v3$A1 = toupper(v3$A1)
v3$A2 = toupper(v3$A2)
write.table(m3, "202205_updated_MVP_MGB_VU_normmgh.meta.0.8n.ivw", quote=FALSE, row.names=F, col.names=T, sep = " ")
write.table(v3, "202205_updated_MVP_MGB_VU_normvu.meta.0.8n.ivw", quote=FALSE, row.names=F, col.names=T, sep = " ")
I grep the GWS snp among the different input GWAS and meta-analysis and make a csv file. Example in chr16_rs8050136_forest.csv
library(ggplot2); library(data.table)
options(scipen=-2, digits = 3)
# For CHR 22 GWS SNP
file <- "GWS_chr22_forest.csv"
snp <- "rs133082"; CHR <- 22
c = fread(file)
# For CHR 16 GWS SNP
c<- fread("chr16_rs8050136_forest.csv")
snp <- "rs56313538"; CHR <- 16
# For one model
# Match alleles and change BETA direction depending on A1
c$BETA <- ifelse(c$A1 != c[1,]$A1, -c$BETA, c$BETA)
c$FREQ <- ifelse(c$A1 !=c[1,]$A1, 1-c$FREQ, c$FREQ)
#c$A2 <- ifelse(c$A1 != c[1,]$A1, c[nrow(c),]$A2, c$A2)
#c$A1 <- ifelse(c$A1 != c[1,]$A1, c[nrow(c),]$A1, c$A1)
col2 = "#00AFBB"
col4 = "#DBA401"
c$axiscolors="black"
c[which(c$level=="1" | c$level=="0"),]$axiscolors= col2
c[which(c$level=="3" | c$level=="2"),]$axiscolors= col4
c$Cohort <- factor(c$Cohort, levels = c$Cohort[rev(order(c$level))])
exclude = c("MGB TRD meta | BMI", "VUMC TRD meta | BMI", "MVP (AFR) - MGB model","MVP (AFR) - VUMC model")
a=c[!which(c$Cohort %in% exclude)]
p=ggplot(a, aes(x=Cohort, y=BETA, ymin=BETA-SE, ymax =BETA+SE, label=signif(P,3)))+
geom_hline(yintercept = 0, lty=2, colour="gray")+
geom_pointrange()+
geom_text(nudge_x = 0.4, hjust = "inward", nudge_y = 2E-2, colour = "navy",size=3) +
#geom_text(data= c[which(P > 0.0001)], nudge_x = 0.4, hjust = "inward", nudge_y = 0, colour = "navy") +
coord_flip() + ylab("BETA") +
ggtitle(paste("Chr",CHR,a$SNP)) +
geom_pointrange(data = a[1,],col=col2,shape=15,size=1,stroke=1) +
geom_pointrange(data = a[1:5,],col=col2,shape=20,size=1,stroke=1) +
geom_pointrange(data = a[6,],col=col4,shape=15,size=1,stroke=1) +
geom_pointrange(data = a[6:10,],col=col4,shape=20,size=1,stroke=1) +
theme(text=element_text(size = 13),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")) +
theme(plot.title = element_text(size=12, margin = margin(t = 0, r = 0, b = 10, l = 0))) +
theme(axis.title.x = element_text(size=12, margin = margin(t = 10, r = 0, b = 0, l = 0))) +
theme(axis.text.y = element_text(color=rev(a$axiscolors))) +
theme(axis.title.y = element_blank())+
theme(plot.margin=unit(c(0.5,1.2,0.5,0.5),"cm"))
plot(p)
ggsave("GWS_chr22_forest_plot_only_meta.pdf", width=9, height=4.5)
SNP-h2 estimation of both meta-analysis files and individual cohort GWAS using obsv_h2.sh
# define path to meta-analysis
model=mgh
name=202205_updated_MVP_MGB_VU_norm${model}
outpath=/data/ruderferlab/projects/biovu/trd # modify for local outpath
file=${outpath}/results/meta/202205_updated_MVP_MGB_VU/${name}.meta.0.8n.ivw
ldsc=/data/ruderferlab/software/ldsc
module load Anaconda2
python ${ldsc}/munge_sumstats.py \
--merge-alleles ${ldsc}/ref/w_hm3.snplist \
--sumstats ${file} \
--N-col NMISS \
--out ${outpath}/results/ldsc/${name}
# Heritability estimation of meta-analysis files
python ${ldsc}/ldsc.py \
--h2 ${outpath}/results/ldsc/${name}.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_obsv_h2
# SNP-h2 of the individual studies
file=$1
name=$2
ldsc=/data/ruderferlab/software/ldsc
module load Anaconda2
python ${ldsc}/munge_sumstats.py \
--merge-alleles ${ldsc}/ref/w_hm3.snplist \
--sumstats ${file} \
--N-col NMISS \
--out ${outpath}/results/ldsc/${name}
python ${ldsc}/ldsc.py \
--h2 ${outpath}/results/ldsc/${name}.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_obsv_h2
# VUMC model individual cohort SNP-h2 estimation
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.vumc_model.sumstat.gz
file3=$sumstat_path/MGB.trd.vumc_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.vumc_model.sumstat.gz
sh ${outpath}/scripts/obsv_h2.sh $file1 geisinger.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file2 vu.mega_eur.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file3 MGB.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file4 MVP.trd.v4.vumc_model
# MGB model individual cohort SNP-h2 estimation
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.mgh_model.sumstat.gz
file3=$sumstat_path/MGB.trd.mgh_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.mgh_model.sumstat.gz
sh ${outpath}/scripts/obsv_h2.sh $file1 geisinger.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file2 vu.mega_eur.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file3 MGB.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file4 MVP.trd.v4.mgh_model
3.2 Genetic correlation with other TRD summary statistics, and publicly available psychiatric and non-psychiatric traits
List of PMID of source of each trait and summary statistics are listed in the manuscript Supplementary Tables.
cd ${outpath}/results/ldsc
# Other TRD
python ${ldsc}/ldsc.py \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--rg ${name}.sumstats.gz,202205_updated_MVP_MGB_VU_normmgh.sumstats.gz,202205_updated_MVP_MGB_VU_normvu.sumstats.gz,fabbri_ukb_TRDvsNotTRD.sumstats.gz,prefect-mdd2021-narrow.sumstats.gz \
--out ${name}_other_trd_rg
# psych
cd /data/ruderferlab/projects/suicide/results/ldsc/sumstats/pgc_gwas/
python ${ldsc}/ldsc.py \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--rg ${outpath}/results/ldsc/${name}.sumstats.gz,pgc_mdd_2018_ex23andme.sumstats.gz,pgc_depsx_mdd_2019_exUKB_23andme.sumstats.gz,daner_bip_pgc3_nm.ldsc.sumstats.gz,pgc3_scz_neff.sumstats.gz,pgc_ed_AN2.2019-07.sumstats.gz,pgc_asd_2017.sumstats.gz,pgc_ocd_2017.sumstats.gz,pgc_ts_tourette_2018.sumstats.gz,pgc_adhd_2017.sumstats.gz,pgc_ptsd_2019.sumstats.gz,pgc_aud_2018.sumstats.gz,pgc_alcdep_2018.sumstats.gz,pgc_aud_t_2018.sumstats.gz,pgc_aud_c_2018.sumstats.gz,pgc_aud_p_2018.sumstats.gz,pgc_cannabis_2018.sumstats.gz,pgc_cdg_2019.sumstats.gz,pgc_risk.sumstats.gz,edu_attainment_2018.sumstats.gz,../neff/daner_isgc_mvp_EUR_062821.neff.sumstats.gz,self-harm_behavior.sumstats.gz,self-harm_ideation.sumstats.gz,insomnia.sumstats.gz,self-harm_ideation_after_LMMtoLog_Neff.sumstats.gz \
--out ${outpath}/results/ldsc/${name}_psych_rg
# non-psych
python ${ldsc}/ldsc.py \
--rg ${outpath}/results/ldsc/${name}.sumstats.gz,neuroticism_2016.sumstats.gz,intelligence_2018.sumstats.gz,cig_per_day_2019.sumstats.gz,drinks_per_week_2019.sumstats.gz,t2dm_2017.sumstats.gz,ukbb_income.sumstats.gz,Whradjbmi_eur.giant-ukbb.combined.2018.sumstats.gz,bmi_eur_giant-ukbb_combined_2018.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_nonpsych_rg
Compile the genetic correlations and store into csv. Sample in trd_cond_bmi_rgs.csv
library(data.table); library(stringr); library(dplyr); library(ggplot2)
setwd("~/OneDrive - Vanderbilt/Ruderferlab/TRD/trd_20210409")
#setwd("C:/Users/Jay Kang/OneDrive - Vanderbilt/Ruderferlab/TRD/trd_20210409")
#df = fread("~/trd/results/ldsc/trd_meta_psych_rg.txt")
df = fread("trd_cond_bmi_rgs.csv")
df$p1 <- factor(df$p1, levels=unique(df$p1[rev(order(df$p1))]))
df$p2 <- factor(df$p2, levels=unique(df$p2[rev(order(df$group))]))
df$p2 <- factor(df$p2, levels(df$p2)[c(1:10,23:26,11:18,19:22,27:29)])
df$group <- as.factor(df$group)
df$group <- factor(df$group, levels(df$group)[c(1,3,4,2,5,6,7,8)])
# just VUMC and MGB models
df2=df[p1 == "MGB" | p1 == "VUMC"]
bold.p2 <- c('Educational attainment', 'BMI (EUR)', 'Type 2 diabetes', 'Neuroticism', 'Drinks per week', 'AUDIT-C', 'AUDIT-T', 'Marijuana use')
vec_font <- ifelse(levels(df2$p2) %in% bold.p2, "bold", "plain")
p=ggplot(df2, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
theme(axis.text.y = element_text(face=vec_font), panel.background = element_rect(fill="white"),
panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
panel.border = element_rect(linetype = "solid", fill = NA))+
geom_errorbar(
aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
position = position_dodge(0.4), width = 0.2
)+
geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
#geom_point(data=df2[p <0.05/29] , color = "white", position = position_dodge(0.4),size=2) +
#geom_text(data=df2[which(p <0.05/27)],nudge_x = 0.4, hjust = "outward", nudge_y = 3E-3, colour = "navy",size=3) +
#geom_pointrange(data=df2[p<0.05/29], col = as.factor(df2$p1), position = position_dodge(0.4),size=2)+
scale_color_manual(values = c("#E7B800","#00AFBB"))+
xlab(" ")+ylab("Genetic correlation (rg)")+
labs(color = "TRD Model")+
#scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))
+theme(strip.background = element_rect(colour = "black"))
# to edit each model and after BMI conditioning
df2=df[p1 == "MGB" | p1 == "MGB_cond_BMI"]
p=ggplot(df2, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
theme(axis.text.y = element_text(), panel.background = element_rect(fill="white"),
panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
panel.border = element_rect(linetype = "solid", fill = NA))+
geom_errorbar(
aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
position = position_dodge(0.4), width = 0.2
)+
geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
scale_color_manual(values = c("#00AFBB","#015b61"))+
xlab(" ")+ylab("Genetic correlation (rg)")+
labs(color = "TRD Model")+
#scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))+theme(strip.background = element_rect(colour = "black"))
df3=df[p1 == "VUMC" | p1 == "VUMC_cond_BMI"]
p=ggplot(df3, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
theme(axis.text.y = element_text(), panel.background = element_rect(fill="white"),
panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
panel.border = element_rect(linetype = "solid", fill = NA))+
geom_errorbar(
aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
position = position_dodge(0.4), width = 0.2
)+
geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
scale_color_manual(values = c("#E7B800","#826803"))+
xlab(" ")+ylab("Genetic correlation (rg)")+
labs(color = "TRD Model")+
#scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))+theme(strip.background = element_rect(colour = "black"))