Skip to content
JooEun Jay Kang edited this page Apr 24, 2024 · 6 revisions

TRD_GWAS

GWAS of ECT-based predicted TRD

Step 1: Meta-analysis with the other sites and creating LOO files

Inverse variance weighted meta-analysis is conducted in both METAL and PLINK. PLINK is run for the CHR BP columns.
Leave one out (LOO) analyses are for polygenic risk score (PRS) association studies.

Sample script for meta-analysis using METAL and PLINK are in metal_script.sh

Cohort specific summary statistics from samples in VUMC, MGB, Geisinger, and MVP are available here for download.

1.1 Run meta-analysis for all files

# make sure header has same format
# sample metal script with the following input

sumstat_path=/data/ruderferlab/projects/trd # edit for path to summary statistics
outpath=/data/ruderferlab/projects/trd/results/meta/202205_updated_MVP_MGB_VU # edit for path for output

# input for VUMC model meta-analysis
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.vumc_model.sumstat.gz
file3=$sumstat_path/MGB.trd.vumc_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.vumc_model.sumstat.gz
meta1_out=${outpath}/202205_updated_MVP_MGB_VU_normvu

sh metal_script.sh outpath file1 file2 file3 file4 meta1_out

# input for MGB model meta-analysis
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.mgh_model.sumstat.gz
file3=$sumstat_path/MGB.trd.mgh_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.mgh_model.sumstat.gz
meta2_out=${outpath}/202205_updated_MVP_MGB_VU_normmgh

sh metal_script.sh outpath file1 file2 file3 file4 meta2_out

Output meta-analyses generates .tbl file from METAL and .meta file from PLINK.

1.2 Leave-one-out(LOO) meta-analyses generation.

LOO_${site} where summary statistic of site among VUMC, MGB, and MVP is excluded from meta-analysis
Sample script for meta-analysis using METAL and PLINK are in metal_LOO.sh

# Define output path
outpath=${outpath}/results/meta/202205_updated_MVP_MGB_VU/LOO

# LOO MVP
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat 
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normvu.qt.sumstat
file3=$sumstat_path/MGB.trd.vumc_model.sumstat
meta3_out=${outpath}/LOO_MVP_normvu

sh metal_LOO.sh outpath file1 file2 file3 meta3_out

file1=$sumstat_path/geisinger.trd.mgh_model.sumstat 
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normmgh.qt.sumstat
file3=$sumstat_path/MGB.trd.mgh_model.sumstat
meta4_out=${outpath}/LOO_MVP_normmgh

sh metal_LOO.sh outpath file1 file2 file3 meta4_out

# LOO VUMC
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat
file2=$sumstat_path/MGB.trd.vumc_model.sumstat
file3=$sumstat_path/MVP.trd.v4.vumc_model.sumstat
meta5_out=${outpath}/LOO_VUMC_normvu

sh metal_LOO.sh outpath file1 file2 file3 meta5_out

file1=$sumstat_path/geisinger.trd.mgh_model.sumstat 
file2=$sumstat_path/MGB.trd.mgh_model.sumstat
file3=$sumstat_path/MVP.trd.v4.mgh_model.sumstat
meta6_out=${outpath}/LOO_VUMC_normmgh

sh metal_LOO.sh outpath file1 file2 file3 meta6_out

# LOO MGB
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normvu.qt.sumstat
file3=$sumstat_path/MVP.trd.v4.vumc_model.sumstat
meta7_out=${outpath}/LOO_MGB_normvu

sh metal_LOO.sh outpath file1 file2 file3 meta7_out

file1=$sumstat_path/geisinger.trd.mgh_model.sumstat 
file2=$sumstat_path/trd_202205_MEGA_EUR_v3_normmgh.qt.sumstat
file3=$sumstat_path/MVP.trd.v4.mgh_model.sumstat
meta8_out=${outpath}/LOO_MGB_normmgh

sh metal_LOO.sh outpath file1 file2 file3 meta8_out

1.3 Sample filtering and plotting GWAS results

Filter for 80% of total sample size (N) and merge with plink output for CHR BP for QQ and Manhattan plotting. Save output file as .ivw

library(data.table); library(qqman); library(dplyr)
m=fread("202205_updated_MVP_MGB_VU_normmgh1.tbl")
v=fread("202205_updated_MVP_MGB_VU_normvu1.tbl")

names(m)[10] = "P"
names(v)[10] = "P"

# MGB model meta-analysis
pm = fread("202205_updated_MVP_MGB_VU_normmgh.meta")
m1 = m %>% filter(TN > max(m$TN)*0.8)
m2 = merge(pm[,c(1,2,3)], m1, by.x = "SNP", by.y = "MarkerName")

# plot Manhattan and QQ plot
m.sub = m2 %>% filter(P < 5e-2)

title = "202205_updated_MVP_MGB_VU_normmgh"
     png(paste(title,"_manhattan.png",sep=""), width = 12, height = 5, units = "in", res=220, type = "cairo")
     manhattan(m.sub, col = c("#2C9696", "#005A5A"), suggestiveline=FALSE, 
               annotatePval=5e-8, annotateTop=TRUE, main=title)
     dev.off()

title = "202205_updated_MVP_MGB_VU_normmgh"
     png(paste(title,"_qq.png",sep=""), width = 5, height = 5, units = "in", res=220, type = "cairo")
     qq(m2$P, main = title)
     dev.off()

# VUMC model meta-analysis
pv = fread("202205_updated_MVP_MGB_VU_normvu.meta")
max = max(v$TN)*0.8
v1 = v %>% filter(TN > max)
v2 = merge(pv[,c(1,2,3)], v1, by.x = "SNP", by.y = "MarkerName")
v.sub = v2 %>% filter(P < 5e-2)

# plot Manhattan and QQ plot
title = "202205_updated_MVP_MGB_VU_normvu"
     png(paste(title,"_manhattan.png",sep=""), width = 12, height = 5, units = "in", res=220, type = "cairo")
     manhattan(v.sub, col = c("#2C9696", "#005A5A"), suggestiveline=FALSE, 
               annotatePval=5e-8, annotateTop=TRUE, main=title)
     dev.off()
 
title = "202205_updated_MVP_MGB_VU_normvu"  
     png(paste(title,"_qq.png",sep=""), width = 5, height = 5, units = "in", res=220, type = "cairo")
     qq(v2$P, main = title)
     dev.off()

# Write out meta-analyses with columns CHR BP SNP A1 A2 BETA SE P FREQ NMISS
m3 = m2[,c(2,3,1,4,5,10,11,12,6,18)]
v3 = v2[,c(2,3,1,4,5,10,11,12,6,18)]

colnames(m3) = c("CHR", "BP", "SNP", "A1", "A2", "BETA", "SE", "P", "FREQ", "NMISS")
colnames(v3) = c("CHR", "BP", "SNP", "A1", "A2", "BETA", "SE", "P", "FREQ", "NMISS")

m3$A1 = toupper(m3$A1)
m3$A2 = toupper(m3$A2)
v3$A1 = toupper(v3$A1)
v3$A2 = toupper(v3$A2)

write.table(m3, "202205_updated_MVP_MGB_VU_normmgh.meta.0.8n.ivw", quote=FALSE, row.names=F, col.names=T, sep = " ")
write.table(v3, "202205_updated_MVP_MGB_VU_normvu.meta.0.8n.ivw", quote=FALSE, row.names=F, col.names=T, sep = " ")

Step 2: Forest plot of the GWS hits

I grep the GWS snp among the different input GWAS and meta-analysis and make a csv file. Example in chr16_rs8050136_forest.csv

library(ggplot2); library(data.table)
options(scipen=-2, digits = 3)

# For CHR 22 GWS SNP
file <- "GWS_chr22_forest.csv"
snp <- "rs133082"; CHR <- 22
c = fread(file)

# For CHR 16 GWS SNP
c<- fread("chr16_rs8050136_forest.csv")
snp <- "rs56313538"; CHR <- 16

# For one model
# Match alleles and change BETA direction depending on A1
c$BETA <- ifelse(c$A1 != c[1,]$A1, -c$BETA, c$BETA)
c$FREQ <- ifelse(c$A1 !=c[1,]$A1, 1-c$FREQ, c$FREQ)
#c$A2 <- ifelse(c$A1 != c[1,]$A1, c[nrow(c),]$A2, c$A2)
#c$A1 <- ifelse(c$A1 != c[1,]$A1, c[nrow(c),]$A1, c$A1)

col2 = "#00AFBB"
col4 = "#DBA401"
c$axiscolors="black"
c[which(c$level=="1" | c$level=="0"),]$axiscolors= col2
c[which(c$level=="3" | c$level=="2"),]$axiscolors= col4
c$Cohort <- factor(c$Cohort, levels = c$Cohort[rev(order(c$level))])

exclude = c("MGB TRD meta | BMI", "VUMC TRD meta | BMI", "MVP (AFR) - MGB model","MVP (AFR) - VUMC model")
a=c[!which(c$Cohort %in% exclude)]

p=ggplot(a, aes(x=Cohort, y=BETA, ymin=BETA-SE, ymax =BETA+SE, label=signif(P,3)))+
  geom_hline(yintercept = 0, lty=2, colour="gray")+
  geom_pointrange()+
  geom_text(nudge_x = 0.4, hjust = "inward", nudge_y = 2E-2, colour = "navy",size=3) +
  #geom_text(data= c[which(P > 0.0001)], nudge_x = 0.4, hjust = "inward", nudge_y = 0, colour = "navy") +
  coord_flip() + ylab("BETA") +
  ggtitle(paste("Chr",CHR,a$SNP)) +
  geom_pointrange(data = a[1,],col=col2,shape=15,size=1,stroke=1) +
  geom_pointrange(data = a[1:5,],col=col2,shape=20,size=1,stroke=1) +
  geom_pointrange(data = a[6,],col=col4,shape=15,size=1,stroke=1) +
  geom_pointrange(data = a[6:10,],col=col4,shape=20,size=1,stroke=1) +
  theme(text=element_text(size = 13),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(plot.title = element_text(size=12, margin = margin(t = 0, r = 0, b = 10, l = 0))) +
  theme(axis.title.x = element_text(size=12, margin = margin(t = 10, r = 0, b = 0, l = 0))) +
  theme(axis.text.y = element_text(color=rev(a$axiscolors))) +
  theme(axis.title.y = element_blank())+
  theme(plot.margin=unit(c(0.5,1.2,0.5,0.5),"cm"))
plot(p)
ggsave("GWS_chr22_forest_plot_only_meta.pdf", width=9, height=4.5)

Step 3: Heritability estimate of meta-analysis and genetic correlations

3.1 LDSC SNP-heritability estimation

SNP-h2 estimation of both meta-analysis files and individual cohort GWAS using obsv_h2.sh

# define path to meta-analysis
model=mgh
name=202205_updated_MVP_MGB_VU_norm${model}
outpath=/data/ruderferlab/projects/biovu/trd # modify for local outpath
file=${outpath}/results/meta/202205_updated_MVP_MGB_VU/${name}.meta.0.8n.ivw

ldsc=/data/ruderferlab/software/ldsc
module load Anaconda2

python ${ldsc}/munge_sumstats.py \
--merge-alleles ${ldsc}/ref/w_hm3.snplist \
--sumstats ${file} \
--N-col NMISS \
--out ${outpath}/results/ldsc/${name}

# Heritability estimation of meta-analysis files
python ${ldsc}/ldsc.py \
--h2 ${outpath}/results/ldsc/${name}.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_obsv_h2

# SNP-h2 of the individual studies
file=$1
name=$2

ldsc=/data/ruderferlab/software/ldsc
module load Anaconda2

python ${ldsc}/munge_sumstats.py \
--merge-alleles ${ldsc}/ref/w_hm3.snplist \
--sumstats ${file} \
--N-col NMISS \
--out ${outpath}/results/ldsc/${name}

python ${ldsc}/ldsc.py \
--h2 ${outpath}/results/ldsc/${name}.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_obsv_h2

# VUMC model individual cohort SNP-h2 estimation
file1=$sumstat_path/geisinger.trd.vumc_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.vumc_model.sumstat.gz
file3=$sumstat_path/MGB.trd.vumc_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.vumc_model.sumstat.gz

sh ${outpath}/scripts/obsv_h2.sh $file1 geisinger.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file2 vu.mega_eur.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file3 MGB.trd.vumc_model
sh ${outpath}/scripts/obsv_h2.sh $file4 MVP.trd.v4.vumc_model

# MGB model individual cohort SNP-h2 estimation
file1=$sumstat_path/geisinger.trd.mgh_model.sumstat.gz
file2=$sumstat_path/vu.mega_eur.trd.mgh_model.sumstat.gz
file3=$sumstat_path/MGB.trd.mgh_model.sumstat.gz
file4=$sumstat_path/MVP.trd.v4.mgh_model.sumstat.gz

sh ${outpath}/scripts/obsv_h2.sh $file1 geisinger.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file2 vu.mega_eur.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file3 MGB.trd.mgh_model
sh ${outpath}/scripts/obsv_h2.sh $file4 MVP.trd.v4.mgh_model

3.2 Genetic correlation with other TRD summary statistics, and publicly available psychiatric and non-psychiatric traits

List of PMID of source of each trait and summary statistics are listed in the manuscript Supplementary Tables.

cd ${outpath}/results/ldsc

# Other TRD
python ${ldsc}/ldsc.py \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--rg ${name}.sumstats.gz,202205_updated_MVP_MGB_VU_normmgh.sumstats.gz,202205_updated_MVP_MGB_VU_normvu.sumstats.gz,fabbri_ukb_TRDvsNotTRD.sumstats.gz,prefect-mdd2021-narrow.sumstats.gz  \
--out ${name}_other_trd_rg

# psych
cd /data/ruderferlab/projects/suicide/results/ldsc/sumstats/pgc_gwas/

python ${ldsc}/ldsc.py \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--rg ${outpath}/results/ldsc/${name}.sumstats.gz,pgc_mdd_2018_ex23andme.sumstats.gz,pgc_depsx_mdd_2019_exUKB_23andme.sumstats.gz,daner_bip_pgc3_nm.ldsc.sumstats.gz,pgc3_scz_neff.sumstats.gz,pgc_ed_AN2.2019-07.sumstats.gz,pgc_asd_2017.sumstats.gz,pgc_ocd_2017.sumstats.gz,pgc_ts_tourette_2018.sumstats.gz,pgc_adhd_2017.sumstats.gz,pgc_ptsd_2019.sumstats.gz,pgc_aud_2018.sumstats.gz,pgc_alcdep_2018.sumstats.gz,pgc_aud_t_2018.sumstats.gz,pgc_aud_c_2018.sumstats.gz,pgc_aud_p_2018.sumstats.gz,pgc_cannabis_2018.sumstats.gz,pgc_cdg_2019.sumstats.gz,pgc_risk.sumstats.gz,edu_attainment_2018.sumstats.gz,../neff/daner_isgc_mvp_EUR_062821.neff.sumstats.gz,self-harm_behavior.sumstats.gz,self-harm_ideation.sumstats.gz,insomnia.sumstats.gz,self-harm_ideation_after_LMMtoLog_Neff.sumstats.gz \
--out ${outpath}/results/ldsc/${name}_psych_rg

# non-psych
python ${ldsc}/ldsc.py \
--rg ${outpath}/results/ldsc/${name}.sumstats.gz,neuroticism_2016.sumstats.gz,intelligence_2018.sumstats.gz,cig_per_day_2019.sumstats.gz,drinks_per_week_2019.sumstats.gz,t2dm_2017.sumstats.gz,ukbb_income.sumstats.gz,Whradjbmi_eur.giant-ukbb.combined.2018.sumstats.gz,bmi_eur_giant-ukbb_combined_2018.sumstats.gz \
--ref-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--w-ld-chr ${ldsc}/ref/eur_w_ld_chr/ \
--out ${outpath}/results/ldsc/${name}_nonpsych_rg

3.3 Plotting genetic correlations (rgs)

Compile the genetic correlations and store into csv. Sample in trd_cond_bmi_rgs.csv

Compare rgs between TRD and MGB model

library(data.table); library(stringr); library(dplyr); library(ggplot2)
setwd("~/OneDrive - Vanderbilt/Ruderferlab/TRD/trd_20210409")
#setwd("C:/Users/Jay Kang/OneDrive - Vanderbilt/Ruderferlab/TRD/trd_20210409")
#df = fread("~/trd/results/ldsc/trd_meta_psych_rg.txt")
df = fread("trd_cond_bmi_rgs.csv")

df$p1 <- factor(df$p1, levels=unique(df$p1[rev(order(df$p1))]))
df$p2 <- factor(df$p2, levels=unique(df$p2[rev(order(df$group))]))
df$p2 <- factor(df$p2, levels(df$p2)[c(1:10,23:26,11:18,19:22,27:29)])
df$group <- as.factor(df$group)
df$group <- factor(df$group, levels(df$group)[c(1,3,4,2,5,6,7,8)])

# just VUMC and MGB models
df2=df[p1 == "MGB" | p1 == "VUMC"]
bold.p2 <- c('Educational attainment', 'BMI (EUR)', 'Type 2 diabetes', 'Neuroticism', 'Drinks per week', 'AUDIT-C', 'AUDIT-T', 'Marijuana use')
vec_font <- ifelse(levels(df2$p2) %in% bold.p2, "bold", "plain")

p=ggplot(df2, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
  theme(axis.text.y = element_text(face=vec_font), panel.background = element_rect(fill="white"), 
        panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
        panel.border = element_rect(linetype = "solid", fill = NA))+
  geom_errorbar(
    aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
    position = position_dodge(0.4), width = 0.2
    )+
  geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
  #geom_point(data=df2[p <0.05/29] , color = "white", position = position_dodge(0.4),size=2) +
  #geom_text(data=df2[which(p <0.05/27)],nudge_x = 0.4, hjust = "outward", nudge_y = 3E-3, colour = "navy",size=3) +
  #geom_pointrange(data=df2[p<0.05/29], col = as.factor(df2$p1), position = position_dodge(0.4),size=2)+
  scale_color_manual(values = c("#E7B800","#00AFBB"))+
  xlab(" ")+ylab("Genetic correlation (rg)")+
  labs(color = "TRD Model")+
  #scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
  ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))
+theme(strip.background = element_rect(colour = "black"))

Compare rgs between VUMC or MGB model before and after BMI conditioning

# to edit each model and after BMI conditioning
df2=df[p1 == "MGB" | p1 == "MGB_cond_BMI"]
p=ggplot(df2, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
  theme(axis.text.y = element_text(), panel.background = element_rect(fill="white"),
        panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
        panel.border = element_rect(linetype = "solid", fill = NA))+
  geom_errorbar(
    aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
    position = position_dodge(0.4), width = 0.2
    )+
  geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
  scale_color_manual(values = c("#00AFBB","#015b61"))+
  xlab(" ")+ylab("Genetic correlation (rg)")+
  labs(color = "TRD Model")+
  #scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
  ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))+theme(strip.background = element_rect(colour = "black"))

df3=df[p1 == "VUMC" | p1 == "VUMC_cond_BMI"]
p=ggplot(df3, aes(p2, rg, label=formatC(p, format="e",digits = 2))) +
  theme(axis.text.y = element_text(), panel.background = element_rect(fill="white"), 
        panel.grid.major = element_line(color="gray90"), panel.grid.minor = element_line(color="gray95"),
        panel.border = element_rect(linetype = "solid", fill = NA))+
  geom_errorbar(
    aes(ymin = rg-se, ymax = rg+se, color = as.factor(p1)),
    position = position_dodge(0.4), width = 0.2
    )+
  geom_point(aes(color = p1), position = position_dodge(0.4),size=2) +
  scale_color_manual(values = c("#E7B800","#826803"))+
  xlab(" ")+ylab("Genetic correlation (rg)")+
  labs(color = "TRD Model")+
  #scale_x_discrete(labels= function(x) highlight(x, "Educational attainment|BMI (EUR)|Type 2 diabetes|Neuroticism|Drinks per week|AUDIT-C|AUDIT-T"))+
  ylim(-1, 1.1)+coord_flip()
plot(p)
plot(p+facet_grid (as.factor(df2$group), scales = "free_y", space = "free_y"))+theme(strip.background = element_rect(colour = "black"))