-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmsbb_array_diffcoexp2_functional_analysis.R
More file actions
536 lines (445 loc) · 44.4 KB
/
msbb_array_diffcoexp2_functional_analysis.R
File metadata and controls
536 lines (445 loc) · 44.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
library(diffcoexp)
library(org.Hs.eg.db)
library(dplyr)
library(magrittr)
library(enrichR)
library(data.table)
library(foreach)
library(doParallel)
library(DCGL)
library(clusterProfiler)
mapIds2<-function(IDs,IDFrom,IDTo){
require(org.Hs.eg.db)
idmap=mapIds(x = org.Hs.eg.db,keys = IDs,column = IDTo,keytype = IDFrom,multiVals = "first")
na_vec=names(idmap[is.na(idmap)==T])
idmap=idmap[is.na(idmap)==F]
idmap_df=data.frame("From"=names(idmap),"To"=unlist(unname(idmap)),stringsAsFactors = F)
return(list(map=idmap_df,noMap=na_vec))
}
jaccard=function(A,B){
jc=set_cardinality(intersect(A,B))/set_cardinality(union(A,B))
return(jc)
}
dbs <- listEnrichrDbs()
kegg_dbs=grep(pattern = "KEGG",x = dbs$libraryName,value = T)[3]
biocarta_dbs=grep(pattern = "BioCarta|Panther",x = dbs$libraryName,value = T)[4]
panther_dbs=grep(pattern = "BioCarta|Panther",x = dbs$libraryName,value = T)[5]
kegg_2016_pathways.df=read.gmt(gmtfile = "../../../../../KEGG_2016.txt")
tanzi_ranked_genes=read.table("/Users/sandeepamberkar/Work/Collaborations/Tanzi_WGS/Gene_Lists/All_genes_ranked.txt",header = T,sep = "\t",stringsAsFactors = F)
tanzi_ranked_genes.great=read.table("/Users/sandeepamberkar/Work/Collaborations/Tanzi_WGS/Gene_Lists/Ranked_genes_corrected_allele_top_4000_SNPs_genes_from_great.txt",header = T,sep = "\t",stringsAsFactors = F)
tanzi_ranked_genes.great$SNP_Rank=gsub(pattern = "SNP_rank_",replacement = "",x = tanzi_ranked_genes.great$SNP_Rank)
tanzi_ranked_genes.AD=unique(tanzi_ranked_genes.great%>%mutate(SNP_Rank=as.integer(gsub(pattern = "SNP_rank_",replacement = "",x = SNP_Rank)))%>%dplyr::filter(Corrected_association=="Case"&SNP_Rank<=1000)%>%pull(Gene))
tanzi_ranked_genes.Control=unique(tanzi_ranked_genes.great%>%mutate(SNP_Rank=as.integer(gsub(pattern = "SNP_rank_",replacement = "",x = SNP_Rank)))%>%dplyr::filter(Corrected_association=="Control"&SNP_Rank<=1000)%>%pull(Gene))
tanzi_ranked_genes.union=union(tanzi_ranked_genes.AD,tanzi_ranked_genes.Control)
setwd("/Users/sandeepamberkar/Work/Data/MSMM-MSBB-HBTRC-Data/MSBB_Array19/GSE84422/")
msbb_gse84422_GPL96_97_samplesToAnalyse.exprs=readRDS("msbb_gse84422_GPL96_97_samplesToAnalyse_exprs.RDS")
names(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs)=gsub(pattern = " ",replacement = "_",x = names(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs))
msbb_gse84422_GPL570_samplesToAnalyse.exprs=readRDS("msbb_gse84422_GPL570_samplesToAnalyse_exprs.RDS")
names(msbb_gse84422_GPL570_samplesToAnalyse.exprs)=gsub(pattern = " ",replacement = "_",x = names(msbb_gse84422_GPL570_samplesToAnalyse.exprs))
regnet_tf2target.HGNC=fread("/shared/hidelab2/user/md4zsa/Work/Data/TF_Databases/RegNetwork_human_regulators2.txt",header = T,sep = "\t",showProgress = T,data.table = F)%>%dplyr::filter(evidence=="Experimental")%>%dplyr::select(c(regulator_symbol,target_symbol))
regnet_tf2target.HGNC=fread("/Users/sandeepamberkar/Work/Data/TF_Databases/RegNetwork_human_regulators2.txt",header = T,sep = "\t",showProgress = T,data.table = F)%>%filter(evidence=="Experimental")%>%dplyr::select(c(regulator_symbol,target_symbol))
regnet_tf2target.graph=graph.data.frame(d = regnet_tf2target.HGNC,directed = F)
msbb_gse84422_diffcoexp_results_files=list.files(path = "Lobe_Analysis/Dyscorrelation_Landscape_Results/",pattern = "diffcoexp",full.names = T)%>%grep(pattern = ".RDS",value = T)%>%sort
msbb_gse84422_diffcoexp_results=vector(mode = "list",length = length(msbb_gse84422_diffcoexp_results_files))
names(msbb_gse84422_diffcoexp_results)=gsub(pattern = " ",replacement = "_",gsub(pattern = "_diffcoexp_results",replacement = "",x = unlist(lapply(lapply(lapply(strsplit(x = msbb_gse84422_diffcoexp_results_files,split = "//"),`[[`,2),function(a)strsplit(x = a[[1]],split = "\\.")[[1]]),`[[`,1))))
for(f in 1:19){
msbb_gse84422_diffcoexp_results[[f]]=readRDS(msbb_gse84422_diffcoexp_results_files[f])
}
#Separate DCGs & DCLs in individual lists
msbb_gse84422.DCGs=lapply(msbb_gse84422_diffcoexp_results,function(x)x$DCGs)
msbb_gse84422.DCGs_list=lapply(msbb_gse84422.DCGs,function(x)x%>%dplyr::filter(q<=0.05)%>%pull(Gene))
msbb_gse84422.RS_DCGs_list=vector(mode = "list",length = 19)
names(msbb_gse84422.RS_DCGs_list)=names(msbb_gse84422.DCGs_list)
for(i in 1:length(msbb_gse84422.DCGs_list)){
msbb_gse84422.RS_DCGs_list[[i]]=setdiff(msbb_gse84422.DCGs_list[[i]],(Reduce(union,lapply(msbb_gse84422.DCGs_list,function(x)intersect(x,msbb_gse84422.DCGs_list[[i]]))[-i])))
}
msbb_gse84422.DCGs_list_Entrez=lapply(msbb_gse84422.DCGs_list,function(y)unname(mapIds(x = org.Hs.eg.db,keys = y,keytype = "SYMBOL",column = "ENTREZID")))
msbb_gse84422.RS_DCGs_list_Entrez=lapply(msbb_gse84422.RS_DCGs_list,function(y)unname(mapIds(x = org.Hs.eg.db,keys = y,keytype = "SYMBOL",column = "ENTREZID")))
msbb_gse84422.DCLs=lapply(msbb_gse84422_diffcoexp_results,function(x)x$DCLs%>%dplyr::filter(q.diffcor<=0.05)%>%rownames_to_column("GenePair"))
msbb_gse84422_total_DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length))
msbb_gse84422_total_DCLs=unlist(lapply(msbb_gse84422.DCLs,function(x)dim(x)[1]))
dat1=data.frame(DCLs=msbb_gse84422_total_DCLs,DCGs=msbb_gse84422_total_DCGs,Region=names(msbb_gse84422_total_DCLs))
dat1m=melt(dat1[,c('Region','DCLs','DCGs')],id.vars=1)
ggplot(dat1m,aes(x = Region,y = value)) + geom_bar(aes(fill = variable),stat = "identity",position = "dodge")+
labs(title="MSBB array - DCGs and DCLs")+
theme(title = element_text(face = "bold",size = 15,colour = "black"),axis.title.x = element_text(face = "bold",size = 15,colour = "black"),axis.text.x = element_text(size = 10,colour = "black",angle = 90),legend.text = element_text(size = 15))
msbb_gse84422_DCG.CompMatrix=matrix(NA,ncol=length(msbb_gse84422.DCGs_list),nrow=length(msbb_gse84422.DCGs_list))
rownames(msbb_gse84422_DCG.CompMatrix)=colnames(msbb_gse84422_DCG.CompMatrix)=c("AMYG","AC","CN","DLPFC","FP","HIPP","IFG","ITG","MTG","NAc","OVC","PHG","PCC","PCG","FC","PTMN","SPL","STG","TP")
for(i in 1:length(msbb_gse84422.DCGs_list)){
msbb_gse84422_DCG.CompMatrix[i,]=unlist(lapply(msbb_gse84422.DCGs_list,function(x)jaccard(x,msbb_gse84422.DCGs_list[[i]])))
}
diag(msbb_gse84422_DCG.CompMatrix)=0
cbind.data.frame(DCGs=rowSums(msbb_gse84422_DCG.CompMatrix[-c(14,16),-c(14,16)]),
RS_DCGs=unlist(lapply(msbb_gse84422.RS_DCGs_list[-c(14,16)],length)),
Rank_by_G_DCGs=rank(-rowSums(msbb_gse84422_DCG.CompMatrix[-c(14,16),-c(14,16)])),
Rank_by_RS_DCGs=rank(-unlist(lapply(msbb_gse84422.RS_DCGs_list[-c(14,16)],length))))
circos.par(gap.after = c(rep(2, nrow(mat)-1), 10, rep(2, ncol(mat)-1), 10))
chordDiagram(msbb_gse84422_DCG.CompMatrix, annotationTrack = "grid", transparency = 0.5,preAllocateTracks = list(track.height = 0.1))
circos.text(niceFacing = T,cex = 10,x = 1,y = 1,)
#Read DRsort results
msbb_gse84422.DRsort=vector(mode = "list",length = 19)
names(msbb_gse84422.DRsort)=names(msbb_gse84422_diffcoexp_results)
for(i in c(2:9,11:19)){
msbb_gse84422.DRsort[[i]]=DRsort(DCGs = msbb_gse84422.DCGs[[i]],DCLs = msbb_gse84422.DCLs[[i]],tf2target = regnet_tf2target.HGNC,expGenes =rownames(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs$Frontal_Pole))
}
for(i in c(1,10)){
msbb_gse84422.DRsort[[i]]=DRsort(DCGs = msbb_gse84422.DCGs[[i]],DCLs = msbb_gse84422.DCLs[[i]],tf2target = regnet_tf2target.HGNC,expGenes =rownames(msbb_gse84422_GPL570_samplesToAnalyse.exprs$Amygdala))
}
names(msbb_gse84422.DRsort)=names(msbb_gse84422.DCGs)
# for(i in c(1,10)){msbb_gse84422.DRsort$Amygdala=DRsort(DCGs = msbb_gse84422.DCGs$Amygdala,DCLs = msbb_gse84422.DCLs$Amygdala,tf2target = regnet_tf2target.HGNC,expGenes = rownames(msbb_gse84422_GPL570_samplesToAnalyse.exprs$Amygdala))
# msbb_gse84422.DRsort[[i]]=DRsort(DCGs = msbb_gse84422.DCGs[[i]],DCLs = msbb_gse84422.DCLs[[i]],tf2target = regnet_tf2target.HGNC,expGenes = rownames(msbb_gse84422_GPL570_samplesToAnalyse.exprs$Amygdala))
# }
# for(i in c(2:9,11:19)){
# msbb_gse84422.DRsort[[i]]=DRsort(DCGs = msbb_gse84422.DCGs[[i]],DCLs = msbb_gse84422.DCLs[[i]],tf2target = regnet_tf2target.HGNC,expGenes = rownames(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs$Frontal_Pole))
# }
# msbb_gse84422.DRsort=readRDS("msbb_gse84422_DRsort.RDS")
msbb_gse84422.DRGs=lapply(msbb_gse84422.DRsort,function(x)x$DRGs%>%filter(DCGisTF=="TRUE"&q<=0.05))
msbb_gse84422.DRGs_list=lapply(msbb_gse84422.DRsort,function(x)x$DRGs%>%filter(DCGisTF=="TRUE"&q<=0.05)%>%pull(DCG)%>%droplevels%>%levels)
msbb_gse84422.RS_DRGs_list=vector(mode = "list",length = 18)
names(msbb_gse84422.RS_DRGs_list)=names(msbb_gse84422.DRGs_list)
for(i in 1:length(msbb_gse84422.DRGs_list)){
msbb_gse84422.RS_DRGs_list[[i]]=setdiff(msbb_gse84422.DRGs_list[[i]],(Reduce(union,lapply(msbb_gse84422.DRGs_list,function(x)intersect(x,msbb_gse84422.DRGs_list[[i]]))[-i])))
}
msbb_gse84422.RS_DRGs_list=Filter(f = function(x)length(x)>0,x = msbb_gse84422.RS_DRGs_list)
rs_DRG_DCLs=list()
for(i in names(msbb_gse84422.RS_DRGs_list)){
rs_DRG_DCLs[[i]]=msbb_gse84422.DRGs[[i]]%>%dplyr::filter(DCG%in%msbb_gse84422.RS_DRGs_list[[i]])%>%pull(DCLs)%>%sum
}
msbb_gse84422.DRGs_list_Entrez=lapply(msbb_gse84422.DRGs_list,function(y)unname(mapIds(x = org.Hs.eg.db,keys = y,keytype = "SYMBOL",column = "ENTREZID")))
msbb_gse84422.DRLs=lapply(msbb_gse84422.DRsort,function(x)x$DRLs%>%filter(q.diffcor<=0.05))
msbb_gse84422.TF_bridged_DCL=Filter(f = function(x)dim(x)[1]>0,x = lapply(msbb_gse84422.DRsort,function(x)x$TF_bridged_DCL%>%filter(q.diffcor<=0.05)))
msbb_gse84422.DCG2TF=lapply(msbb_gse84422.DRsort,function(x)x$DCG2TF)
msbb_gse84422_bridge_DCL_by_TF=vector(mode = "list",length = length(names(msbb_gse84422.TF_bridged_DCL)))
names(msbb_gse84422_bridge_DCL_by_TF)=names(msbb_gse84422.TF_bridged_DCL)
for(i in 1:length(names(msbb_gse84422_bridge_DCL_by_TF))){
msbb_gse84422_bridge_DCL_by_TF[[i]]=vector(mode = "list",length = length(lapply(unique(msbb_gse84422.TF_bridged_DCL[[i]]$common.TF),function(x)msbb_gse84422.TF_bridged_DCL[[i]]%>%filter(common.TF==x)%>%select(c(Gene.1,Gene.2)))))
names(msbb_gse84422_bridge_DCL_by_TF[[i]])=unique(msbb_gse84422.TF_bridged_DCL[[i]]$common.TF)
msbb_gse84422_bridge_DCL_by_TF[[i]][1:length(names(msbb_gse84422_bridge_DCL_by_TF[[i]]))]=lapply(unique(msbb_gse84422.TF_bridged_DCL[[i]]$common.TF),function(x)msbb_gse84422.TF_bridged_DCL[[i]]%>%filter(common.TF==x)%>%select(c(Gene.1,Gene.2)))
}
msbb_array_DRrank.TDD=msbb_array_DRrank.TED=vector(mode = "list",length = 19)
names(msbb_array_DRrank.TDD)=names(msbb_array_DRrank.TED)=names(msbb_gse84422_diffcoexp_results)
msbb_array_DRrank.TDD[c(1,10)]=mcmapply(FUN=function(a,b)DRrank(DCGs = a,DCLs = b,tf2target = regnet_tf2target.HGNC,expGenes = rownames(msbb_gse84422_GPL570_samplesToAnalyse.exprs$Amygdala),rank.method = "TDD",Nperm = 500),msbb_gse84422.DCGs[c(1,10)],msbb_gse84422.DCLs[c(1,10)],mc.cores = mc)
#Functional enrichments
msigdb_c7=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/c7.all.v6.1.symbols.gmt")
msigdb_c2=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/c2.all.v6.1.symbols.gmt")
msigdb_h=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/h.all.v6.1.symbols.gmt")
msigdb_c5.BP=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/c5.bp.v6.1.symbols.gmt")
msigdb_c5.CC=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/c5.cc.v6.1.symbols.gmt")
msigdb_c5.MF=read.gmt(gmtfile = "/Users/sandeepamberkar/Work/Software/MSigDB_Collections/c5.mf.v6.1.symbols.gmt")
msbb_gse84422_DCGs.c7=lapply(msbb_gse84422.DCGs_list,function(x)data.frame(enricher(gene = x,pvalueCutoff = 0.01,pAdjustMethod = "BH",TERM2GENE = msigdb_c7),stringsAsFactors = F))
msbb_gse84422_DCGs.c2=lapply(msbb_gse84422.DCGs_list,function(x)data.frame(enricher(gene = x,pvalueCutoff = 0.01,pAdjustMethod = "BH",TERM2GENE = msigdb_c2),stringsAsFactors = F))
msbb_gse84422_DCGs.h=lapply(msbb_gse84422.DCGs_list,function(x)data.frame(enricher(gene = x,pvalueCutoff = 0.01,pAdjustMethod = "BH",TERM2GENE = msigdb_h),stringsAsFactors = F))
msbb_gse84422_DCGs.KEGG=vector(mode = "list",length = 19)
names(msbb_gse84422_DCGs.KEGG)=names(msbb_gse84422_diffcoexp_results)
msbb_gse84422_DCGs.KEGG=lapply(msbb_gse84422.DCGs_list_Entrez,function(x)data.frame(enrichKEGG(gene = x,organism = "hsa",pvalueCutoff = 0.05,pAdjustMethod = "BH",qvalueCutoff = 0.1),stringsAsFactors = F))
msbb_gse84422_RS_DCGs.KEGG=lapply(msbb_gse84422.RS_DCGs_list_Entrez,function(x)data.frame(enrichKEGG(gene = x,organism = "hsa",pvalueCutoff = 0.05,pAdjustMethod = "BH",qvalueCutoff = 0.2),stringsAsFactors = F))
msbb_gse84422_DCGs.KEGG=Filter(f = function(x)dim(x)[1]>0,x = msbb_gse84422_DCGs.KEGG)
msbb_gse84422_RS_DCGs.KEGG=Filter(f = function(x)dim(x)[1]>0,x = msbb_gse84422_RS_DCGs.KEGG)
msbb_gse84422_DRGs.KEGG=lapply(msbb_gse84422.DRGs_list_Entrez,function(x)data.frame(enrichKEGG(gene = x,organism = "hsa",pvalueCutoff = 0.05,pAdjustMethod = "BH",qvalueCutoff = 0.1),stringsAsFactors = F))
msbb_gse84422_DRGs.KEGG=Filter(f = function(x)dim(x)[1]>0,x = msbb_gse84422_DRGs.KEGG)
msbb_gse84422_DCGs_KEGG.list=lapply(msbb_gse84422_DCGs.KEGG,function(x)x$Description)
msbb_gse84422_DRGs_KEGG.list=lapply(msbb_gse84422_DRGs.KEGG,function(x)x$Description)
msbb_gse84422_DCG_DRG_common_pathways.list=mapply(FUN = function(a,b)intersect(a,b),msbb_gse84422_DCGs_KEGG.list[intersect(names(msbb_gse84422_DCGs_KEGG.list),names(msbb_gse84422_DRGs_KEGG.list))],msbb_gse84422_DRGs_KEGG.list[intersect(names(msbb_gse84422_DCGs_KEGG.list),names(msbb_gse84422_DRGs_KEGG.list))])
#Genetic basis for AD
msbb_gse84422_tanzi_SAGs.overlap=lapply(msbb_gse84422.DCGs_list,intersect,tanzi_ranked_genes.union)
dat2=data.frame(DCG_SAG_overlap=unlist(lapply(msbb_gse84422_tanzi_SAGs.overlap,length)),Total_DCGs=msbb_gse84422_total_DCGs,Region=names(msbb_gse84422_total_DCGs))
dat2m=melt(dat1[,c('Region','DCG_SAG_overlap','Total_DCGs')],id.vars=1)
ggplot(dat1m,aes(x = Region,y = value)) + geom_bar(aes(fill = variable),stat = "identity",position = "dodge")+
labs(title="MSBB - Overlap with Tanzi SAGs")+scale_y_log10()+
theme(title = element_text(face = "bold",size = 12,colour = "black"),axis.title.x = element_text(face = "bold",size = 15,colour = "black"),axis.text.x = element_text(size = 10,colour = "black",angle = 90),legend.text = element_text(size = 15))
msbb_array_DRrank.TDD=msbb_array_DRrank.TED=vector(mode = "list",length = 19)
names(msbb_array_DRrank.TDD)=names(msbb_array_DRrank.TED)=names(msbb_gse84422_diffcoexp_results)
msbb_array_DRrank_TED.files=list.files(pattern = "TED",full.names = T)
msbb_array_DRrank_TDD.files=list.files(pattern = "TDD",full.names = T)
for(i in 1:19){
msbb_array_DRrank.TED[[i]]=readRDS(msbb_array_DRrank_TED.files[[i]])
msbb_array_DRrank.TDD[[i]]=readRDS(msbb_array_DRrank_TDD.files[[i]])
}
msbb_array_DRrank.TED=lapply(msbb_array_DRrank.TED,function(x)x%>%filter(p<=0.05)%>%mutate(Rank=with_order(order_by = score,fun = row_number,x = -score))%>%arrange(desc(-Rank))%>%dplyr::select(c(Gene,Rank,score,p,p.adj)))
msbb_array_DRrank.TED=Filter(f = function(x)dim(x)[1]>0,x = msbb_array_DRrank.TED)
msbb_array_DRrank.TDD=lapply(msbb_array_DRrank.TDD,function(x)x%>%rownames_to_column("Gene")%>%filter(p<=0.05)%>%mutate(Rank=with_order(order_by = score,fun = row_number,x = -score))%>%arrange(desc(-Rank))%>%dplyr::select(c(Gene,Rank,score,p,p.adj)))
msbb_array_DRrank.TDD=Filter(f = function(x)dim(x)[1]>0,x = msbb_array_DRrank.TDD)
#NOTCH subunits appear as causal
msbb_gse84422_NOTCH_downstream.DCGs=lapply(msbb_gse84422.DCG2TF[names(Filter(f = function(y)length(y)>0,x = lapply(msbb_array_DRrank.TED,function(x)grep(pattern = "NOTCH",x = x$Gene))))],function(x)x[grep(pattern = "NOTCH",x = x$TF),])
msbb_gse84422_PIAS_downstream.DCGs=lapply(msbb_gse84422.DCG2TF[names(Filter(f = function(y)length(y)>0,x = lapply(msbb_array_DRrank.TED,function(x)grep(pattern = "PIAS",x = x$Gene))))],function(x)x[grep(pattern = "PIAS",x = x$TF),])
##########################################################################################################################################################
# Validation with genetic, epigenomic and cell-type markers
##########################################################################################################################################################
msbb_exp_DCGs=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(tanzi_ranked_genes.union)/21982*x,digits = 1))
msbb_gse84422_tanzi_SAGs.overlap=lapply(msbb_gse84422.DCGs_list,function(x)length(intersect(x,tanzi_ranked_genes.union)))
msbb_gse84422_tanzi_SAGs.fisher_results=vector(mode = "list",length = 19)
for(m in 1:length(msbb_gse84422.DCGs_list)){
msbb_gse84422_tanzi_SAGs.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422_tanzi_SAGs.overlap[[m]]),
length(tanzi_ranked_genes.union)-length(msbb_gse84422_tanzi_SAGs.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_tanzi_SAGs.overlap[[m]]),
21982-(length(tanzi_ranked_genes.union)-length(msbb_gse84422_tanzi_SAGs.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_tanzi_SAGs.overlap[[m]])),nrow = 2))
}
res.SAG_DCG=foreach(i=1:length(msbb_gse84422_tanzi_SAGs.overlap))%do%{
data.frame(SAGs_in_DCGs=unlist(lapply(msbb_gse84422_tanzi_SAGs.overlap[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_SAGs_in_DCGs=unname(unlist(msbb_exp_DCGs[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_tanzi_SAGs.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_tanzi_SAGs.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.SAG_DCG)=names(msbb_gse84422_tanzi_SAGs.overlap)
res.SAG_DCG_df=data.frame(rbindlist(res.SAG_DCG),stringsAsFactors = F)
rownames(res.SAG_DCG_df)=names(msbb_gse84422_tanzi_SAGs.overlap)
res.SAG_DCG_df$adj.p=p.adjust(p = res.SAG_DCG_df$pval,method = "fdr")
res.SAG_DCG_df$SAGs_in_DCGs=as.numeric(res.SAG_DCG_df$SAGs_in_DCGs)
res.SAG_DCG_df=res.SAG_DCG_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(SAGs_in_DCGs>Expected_SAGs_in_DCGs,true = "Over",false = "Under"))
fwrite(res.SAG_DCG_df,"MSBB_SAG_DCG_EnrichmentSummary.txt",sep = "\t",col.names = T,row.names = F,quote = F)
dhmc_bennet.NP=read.table("/Users/sandeepamberkar/Work/Data/BrainExpression_Datasets/GW-DNA-hydroxymethylation-Bennet/1-s2.0-S155252601633059X-mmc4_NP.txt",sep = "\t",header = T,as.is = T)
dhmc_bennet.NFT=read.table("/Users/sandeepamberkar/Work/Data/BrainExpression_Datasets/GW-DNA-hydroxymethylation-Bennet/1-s2.0-S155252601633059X-mmc5_NFT.txt",sep = "\t",header = T,as.is = T)
d5mc_bernstein.Dhml=scan("/Users/sandeepamberkar/Work/Data/BrainExpression_Datasets/5hMC_Tau_AD/ddw109_Supp/Supplemental Table 6_Bernstein et al_R1.txt",sep = "\n",what = "char")
msbb_gse84422_DCG_d5mc.overlap=lapply(msbb_gse84422.DCGs_list,function(x)length(intersect(x,d5mc_bernstein.Dhml)))
dhmc_bennet_NFT.genes=dhmc_bennet.NFT%>%filter(q.value_<=0.1)%>%pull(Nearest.gene)
dhmc_bennet_NP.genes=dhmc_bennet.NP%>%filter(q.vlaue_<=0.1)%>%pull(Neartest.gene)
msbb_gse84422_DCG_dhmc_NFT.overlap=lapply(msbb_gse84422.DCGs_list,function(x)length(intersect(x,dhmc_bennet_NFT.genes)))
msbb_gse84422_DCG_dhmc_NP.overlap=lapply(msbb_gse84422.DCGs_list,function(x)length(intersect(x,dhmc_bennet_NP.genes)))
msbb_exp_dhmc_NP_DCGs=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(dhmc_bennet_NP.genes)/21982*x,digits = 1))
msbb_gse84422_dhmc_NP.fisher_results=msbb_gse84422_dhmc_NFT.fisher_results=msbb_gse84422_d5mc.fisher_results=vector(mode = "list",length = length(msbb_gse84422.DCGs_list))
names(msbb_gse84422_dhmc_NP.fisher_results)=names(msbb_gse84422_dhmc_NFT.fisher_results)=names(msbb_gse84422_d5mc.fisher_results)=names(msbb_gse84422.DCGs_list)
for(m in 1:length(msbb_gse84422.DCGs_list)){
msbb_gse84422_dhmc_NP.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422_DCG_dhmc_NP.overlap[[m]]),
length(dhmc_bennet_NP.genes)-length(msbb_gse84422_DCG_dhmc_NP.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_dhmc_NP.overlap[[m]]),
21982-(length(dhmc_bennet_NP.genes)-length(msbb_gse84422_DCG_dhmc_NP.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_dhmc_NP.overlap[[m]])),nrow = 2))
}
res.dhmc_NP_DCG=foreach(i=1:length(msbb_gse84422_tanzi_SAGs.overlap))%do%{
data.frame(NPgenes_in_DCGs=unlist(lapply(msbb_gse84422_DCG_dhmc_NP.overlap[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_dhmc_NPgenes_in_DCGs=unname(unlist(msbb_exp_DCGs[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_dhmc_NP.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_dhmc_NP.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.dhmc_NP_DCG)=names(msbb_gse84422_DCG_dhmc_NP.overlap)
res.dhmc_NP_DCG_df=data.frame(rbindlist(res.dhmc_NP_DCG),stringsAsFactors = F)
rownames(res.dhmc_NP_DCG_df)=names(msbb_gse84422_DCG_dhmc_NP.overlap)
res.dhmc_NP_DCG_df$adj.p=p.adjust(p = res.dhmc_NP_DCG_df$pval,method = "fdr")
res.dhmc_NP_DCG_df$NPgenes_in_DCGs=as.numeric(res.dhmc_NP_DCG_df$NPgenes_in_DCGs)
res.dhmc_NP_DCG_df=res.dhmc_NP_DCG_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(NPgenes_in_DCGs>Expected_dhmc_NPgenes_in_DCGs,true = "Over",false = "Under"))
fwrite(res.dhmc_NP_DCG_df,"MSBB_DhMR_NP_DCG_EnrichmentSummary.txt",sep = "\t",col.names = T,row.names = F,quote = F)
for(m in 1:length(msbb_gse84422.DCGs_list)){
msbb_gse84422_dhmc_NFT.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422_tanzi_SAGs.overlap[[m]]),
length(dhmc_bennet_NFT.genes)-length(msbb_gse84422_DCG_dhmc_NFT.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_dhmc_NFT.overlap[[m]]),
21982-(length(dhmc_bennet_NFT.genes)-length(msbb_gse84422_DCG_dhmc_NFT.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_dhmc_NFT.overlap[[m]])),nrow = 2))
}
res.dhmc_NFT_DCG=foreach(i=1:length(msbb_gse84422_tanzi_SAGs.overlap))%do%{
data.frame(NFTgenes_in_DCGs=unlist(lapply(msbb_gse84422_DCG_dhmc_NFT.overlap[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_dhmc_NFTgenes_in_DCGs=unname(unlist(msbb_exp_DCGs[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_dhmc_NFT.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_dhmc_NFT.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.dhmc_NFT_DCG)=names(msbb_gse84422_DCG_dhmc_NFT.overlap)
res.dhmc_NFT_DCG_df=data.frame(rbindlist(res.dhmc_NFT_DCG),stringsAsFactors = F)
rownames(res.dhmc_NFT_DCG_df)=names(msbb_gse84422_DCG_dhmc_NFT.overlap)
res.dhmc_NFT_DCG_df$adj.p=p.adjust(p = res.dhmc_NFT_DCG_df$pval,method = "fdr")
res.dhmc_NFT_DCG_df$NFTgenes_in_DCGs=as.numeric(res.dhmc_NFT_DCG_df$NFTgenes_in_DCGs)
res.dhmc_NFT_DCG_df=res.dhmc_NFT_DCG_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(NFTgenes_in_DCGs>Expected_dhmc_NFTgenes_in_DCGs,true = "Over",false = "Under"))
fwrite(res.dhmc_NFT_DCG_df,"MSBB_DhMR_NFT_DCG_EnrichmentSummary.txt",sep = "\t",col.names = T,row.names = F,quote = F)
#Brain cell type markers Zhang et al.
zhang_celltype_ADgenes=read.xls('/Users/sandeepamberkar/Work/Data/BrainExpression_Datasets/Zhang_19BrainRegions_Paper/Zhang_BrainCelltype_Markers.xlsx',skip=1,sheet=3,header=T,as.is=T)
zhang_celltype_ADgenes.list=vector(mode = 'list',length = 5)
names(zhang_celltype_ADgenes.list)=sort(unique(zhang_celltype_ADgenes$Cell.type))
zhang_celltype_ADgenes.list$Astrocytes=zhang_celltype_ADgenes$Gene.symbol[grep(pattern = names(zhang_celltype_ADgenes.list)[1],zhang_celltype_ADgenes$Cell.type)]
zhang_celltype_ADgenes.list$Endothelial=zhang_celltype_ADgenes$Gene.symbol[grep(pattern = names(zhang_celltype_ADgenes.list)[2],zhang_celltype_ADgenes$Cell.type)]
zhang_celltype_ADgenes.list$Microglia=zhang_celltype_ADgenes$Gene.symbol[grep(pattern = names(zhang_celltype_ADgenes.list)[3],zhang_celltype_ADgenes$Cell.type)]
zhang_celltype_ADgenes.list$Neurons=zhang_celltype_ADgenes$Gene.symbol[grep(pattern = names(zhang_celltype_ADgenes.list)[4],zhang_celltype_ADgenes$Cell.type)]
zhang_celltype_ADgenes.list$Oligodendrocytes=zhang_celltype_ADgenes$Gene.symbol[grep(pattern = names(zhang_celltype_ADgenes.list)[5],zhang_celltype_ADgenes$Cell.type)]
msbb_gse84422_DCG_Astrocytes.overlap=lapply(msbb_gse84422.DCGs_list,intersect,zhang_celltype_ADgenes.list$Astrocytes)
msbb_gse84422_DCG_Endothelial.overlap=lapply(msbb_gse84422.DCGs_list,intersect,zhang_celltype_ADgenes.list$Endothelial)
msbb_gse84422_DCG_Microglia.overlap=lapply(msbb_gse84422.DCGs_list,intersect,zhang_celltype_ADgenes.list$Microglia)
msbb_gse84422_DCG_Neurons.overlap=lapply(msbb_gse84422.DCGs_list,intersect,zhang_celltype_ADgenes.list$Neurons)
msbb_gse84422_DCG_Oligodendrocytes.overlap=lapply(msbb_gse84422.DCGs_list,intersect,zhang_celltype_ADgenes.list$Oligodendrocytes)
msbb_gse84422.exp_Astrocytes=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(zhang_celltype_ADgenes.list$Astrocytes)/21982*x,digits = 1))
msbb_gse84422.exp_Endothelial=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(zhang_celltype_ADgenes.list$Endothelial)/21982*x,digits = 1))
msbb_gse84422.exp_Microglia=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(zhang_celltype_ADgenes.list$Microglia)/21982*x,digits = 1))
msbb_gse84422.exp_Neurons=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(zhang_celltype_ADgenes.list$Neurons)/21982*x,digits = 1))
msbb_gse84422.exp_Oligodendrocytes=lapply(lapply(msbb_gse84422.DCGs_list,length),function(x)round(length(zhang_celltype_ADgenes.list$Oligodendrocytes)/21982*x,digits = 1))
msbb_gse84422_Astrocytes.fisher_results=msbb_gse84422_Endothelial.fisher_results=msbb_gse84422_Microglia.fisher_results=msbb_gse84422_Neurons.fisher_results=msbb_gse84422_Oligodendrocytes.fisher_results=vector(mode = "list",length = length(msbb_gse84422.DCGs_list))
names(msbb_gse84422_Astrocytes.fisher_results)=names(msbb_gse84422_Endothelial.fisher_results)=names(msbb_gse84422_Microglia.fisher_results)=names(msbb_gse84422_Neurons.fisher_results)=names(msbb_gse84422_Oligodendrocytes.fisher_results)=names(msbb_gse84422_total_DCGs)
for(m in 1:length(msbb_gse84422_Astrocytes.fisher_results)){
msbb_gse84422_Astrocytes.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422.exp_Astrocytes[[m]]),
length(zhang_celltype_ADgenes.list$Astrocytes)-length(msbb_gse84422_DCG_Astrocytes.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Astrocytes.overlap[[m]]),
21982-(length(zhang_celltype_ADgenes.list$Astrocytes)-length(msbb_gse84422_DCG_Astrocytes.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Astrocytes.overlap[[m]])),nrow = 2))
}
for(m in 1:length(msbb_gse84422_Endothelial.fisher_results)){
msbb_gse84422_Endothelial.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422.exp_Endothelial[[m]]),
length(zhang_celltype_ADgenes.list$Endothelial)-length(msbb_gse84422_DCG_Endothelial.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Endothelial.overlap[[m]]),
21982-(length(zhang_celltype_ADgenes.list$Endothelial)-length(msbb_gse84422_DCG_Endothelial.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Endothelial.overlap[[m]])),nrow = 2))
}
for(m in 1:length(msbb_gse84422_Microglia.fisher_results)){
msbb_gse84422_Microglia.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422.exp_Microglia[[m]]),
length(zhang_celltype_ADgenes.list$Microglia)-length(msbb_gse84422_DCG_Microglia.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Microglia.overlap[[m]]),
21982-(length(zhang_celltype_ADgenes.list$Microglia)-length(msbb_gse84422_DCG_Microglia.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Microglia.overlap[[m]])),nrow = 2))
}
for(m in 1:length(msbb_gse84422_Neurons.fisher_results)){
msbb_gse84422_Neurons.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422.exp_Neurons[[m]]),
length(zhang_celltype_ADgenes.list$Neurons)-length(msbb_gse84422_DCG_Neurons.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Neurons.overlap[[m]]),
21982-(length(zhang_celltype_ADgenes.list$Neurons)-length(msbb_gse84422_DCG_Neurons.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Neurons.overlap[[m]])),nrow = 2))
}
for(m in 1:length(msbb_gse84422_Oligodendrocytes.fisher_results)){
msbb_gse84422_Oligodendrocytes.fisher_results[[m]]=fisher.test(matrix(c(length(msbb_gse84422.exp_Oligodendrocytes[[m]]),
length(zhang_celltype_ADgenes.list$Oligodendrocytes)-length(msbb_gse84422_DCG_Oligodendrocytes.overlap[[m]]),
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Oligodendrocytes.overlap[[m]]),
21982-(length(zhang_celltype_ADgenes.list$Oligodendrocytes)-length(msbb_gse84422_DCG_Oligodendrocytes.overlap[[m]]))-
lapply(msbb_gse84422.DCGs_list,length)[[m]]-length(msbb_gse84422_DCG_Oligodendrocytes.overlap[[m]])),nrow = 2))
}
res.DCG_Astrocytes=foreach(i=1:length(msbb_gse84422_DCG_Astrocytes.overlap))%do%{
data.frame(AstrocyteMarkers_in_DCGs=unlist(lapply(msbb_gse84422_DCG_Astrocytes.overlap[i],paste,collapse=",")),
Nr_AstrocyteMarkers_in_DCGs=unlist(lapply(lapply(msbb_gse84422_DCG_Astrocytes.overlap,length)[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_AstrocyteMarkers_in_DCGs=unname(unlist(msbb_gse84422.exp_Astrocytes[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_Astrocytes.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_Astrocytes.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.DCG_Astrocytes)=names(msbb_gse84422_DCG_Astrocytes.overlap)
res.DCG_Astrocytes_df=data.frame(rbindlist(res.DCG_Astrocytes),stringsAsFactors = F)
rownames(res.DCG_Astrocytes_df)=names(msbb_gse84422_DCG_Astrocytes.overlap)
res.DCG_Astrocytes_df$adj.p=p.adjust(p = res.DCG_Astrocytes_df$pval,method = "fdr")
res.DCG_Astrocytes_df$Expected_AstrocyteMarkers_in_DCGs=as.numeric(res.DCG_Astrocytes_df$Expected_AstrocyteMarkers_in_DCGs)
res.DCG_Astrocytes_df$Nr_AstrocyteMarkers_in_DCGs=as.numeric(res.DCG_Astrocytes_df$Nr_AstrocyteMarkers_in_DCGs)
res.DCG_Astrocytes_df=res.DCG_Astrocytes_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(Nr_AstrocyteMarkers_in_DCGs>Expected_AstrocyteMarkers_in_DCGs,true = "Over",false = "Under"))
res.DCG_Endothelial=foreach(i=1:length(msbb_gse84422_DCG_Endothelial.overlap))%do%{
data.frame(EndothelialMarkers_in_DCGs=unlist(lapply(msbb_gse84422_DCG_Endothelial.overlap[i],paste,collapse=",")),
Nr_EndothelialMarkers_in_DCGs=unlist(lapply(lapply(msbb_gse84422_DCG_Endothelial.overlap,length)[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_EndothelialMarkers_in_DCGs=unname(unlist(msbb_gse84422.exp_Endothelial[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_Endothelial.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_Endothelial.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.DCG_Endothelial)=names(msbb_gse84422_DCG_Endothelial.overlap)
res.DCG_Endothelial_df=data.frame(rbindlist(res.DCG_Endothelial),stringsAsFactors = F)
rownames(res.DCG_Endothelial_df)=names(msbb_gse84422_DCG_Endothelial.overlap)
res.DCG_Endothelial_df$adj.p=p.adjust(p = res.DCG_Endothelial_df$pval,method = "fdr")
res.DCG_Endothelial_df$Expected_EndothelialMarkers_in_DCGs=as.numeric(res.DCG_Endothelial_df$Expected_EndothelialMarkers_in_DCGs)
res.DCG_Endothelial_df$Nr_EndothelialMarkers_in_DCGs=as.numeric(res.DCG_Endothelial_df$Nr_EndothelialMarkers_in_DCGs)
res.DCG_Endothelial_df=res.DCG_Endothelial_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(Nr_EndothelialMarkers_in_DCGs>Expected_EndothelialMarkers_in_DCGs,true = "Over",false = "Under"))
res.DCG_Microglia=foreach(i=1:length(msbb_gse84422_DCG_Microglia.overlap))%do%{
data.frame(MicrogliaMarkers_in_DCGs=unlist(lapply(msbb_gse84422_DCG_Microglia.overlap[i],paste,collapse=",")),
Nr_MicrogliaMarkers_in_DCGs=unlist(lapply(lapply(msbb_gse84422_DCG_Microglia.overlap,length)[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_MicrogliaMarkers_in_DCGs=unname(unlist(msbb_gse84422.exp_Microglia[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_Microglia.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_Microglia.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.DCG_Microglia)=names(msbb_gse84422_DCG_Microglia.overlap)
res.DCG_Microglia_df=data.frame(rbindlist(res.DCG_Microglia),stringsAsFactors = F)
rownames(res.DCG_Microglia_df)=names(msbb_gse84422_DCG_Microglia.overlap)
res.DCG_Microglia_df$adj.p=p.adjust(p = res.DCG_Microglia_df$pval,method = "fdr")
res.DCG_Microglia_df$Expected_MicrogliaMarkers_in_DCGs=as.numeric(res.DCG_Microglia_df$Expected_MicrogliaMarkers_in_DCGs)
res.DCG_Microglia_df$Nr_MicrogliaMarkers_in_DCGs=as.numeric(res.DCG_Microglia_df$Nr_MicrogliaMarkers_in_DCGs)
res.DCG_Microglia_df=res.DCG_Microglia_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(Nr_MicrogliaMarkers_in_DCGs>Expected_MicrogliaMarkers_in_DCGs,true = "Over",false = "Under"))
res.DCG_Neurons=foreach(i=1:length(msbb_gse84422_DCG_Neurons.overlap))%do%{
data.frame(NeuronsMarkers_in_DCGs=unlist(lapply(msbb_gse84422_DCG_Neurons.overlap[i],paste,collapse=",")),
Nr_NeuronsMarkers_in_DCGs=unlist(lapply(lapply(msbb_gse84422_DCG_Neurons.overlap,length)[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_NeuronsMarkers_in_DCGs=unname(unlist(msbb_gse84422.exp_Neurons[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_Neurons.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_Neurons.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.DCG_Neurons)=names(msbb_gse84422_DCG_Neurons.overlap)
res.DCG_Neurons_df=data.frame(rbindlist(res.DCG_Neurons),stringsAsFactors = F)
rownames(res.DCG_Neurons_df)=names(msbb_gse84422_DCG_Neurons.overlap)
res.DCG_Neurons_df$adj.p=p.adjust(p = res.DCG_Neurons_df$pval,method = "fdr")
res.DCG_Neurons_df$Expected_NeuronsMarkers_in_DCGs=as.numeric(res.DCG_Neurons_df$Expected_NeuronsMarkers_in_DCGs)
res.DCG_Neurons_df$Nr_NeuronsMarkers_in_DCGs=as.numeric(res.DCG_Neurons_df$Nr_NeuronsMarkers_in_DCGs)
res.DCG_Neurons_df=res.DCG_Neurons_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(Nr_NeuronsMarkers_in_DCGs>Expected_NeuronsMarkers_in_DCGs,true = "Over",false = "Under"))
res.DCG_Oligodendrocytes=foreach(i=1:length(msbb_gse84422_DCG_Oligodendrocytes.overlap))%do%{
data.frame(OligodendrocytesMarkers_in_DCGs=unlist(lapply(msbb_gse84422_DCG_Oligodendrocytes.overlap[i],paste,collapse=",")),
Nr_OligodendrocytesMarkers_in_DCGs=unlist(lapply(lapply(msbb_gse84422_DCG_Oligodendrocytes.overlap,length)[i],paste,collapse=",")),
DCGs=unlist(lapply(msbb_gse84422.DCGs_list,length)[[i]]),
Expected_OligodendrocytesMarkers_in_DCGs=unname(unlist(msbb_gse84422.exp_Oligodendrocytes[i])),
Fisher_statistic=unname(unlist(lapply(msbb_gse84422_Oligodendrocytes.fisher_results[i],function(s)s$estimate))),
pval=unname(unlist(lapply(msbb_gse84422_Oligodendrocytes.fisher_results[i],function(p)p$p.value))),
stringsAsFactors = F)
}
names(res.DCG_Oligodendrocytes)=names(msbb_gse84422_DCG_Oligodendrocytes.overlap)
res.DCG_Oligodendrocytes_df=data.frame(rbindlist(res.DCG_Oligodendrocytes),stringsAsFactors = F)
rownames(res.DCG_Oligodendrocytes_df)=names(msbb_gse84422_DCG_Oligodendrocytes.overlap)
res.DCG_Oligodendrocytes_df$adj.p=p.adjust(p = res.DCG_Oligodendrocytes_df$pval,method = "fdr")
res.DCG_Oligodendrocytes_df$Expected_OligodendrocytesMarkers_in_DCGs=as.numeric(res.DCG_Oligodendrocytes_df$Expected_OligodendrocytesMarkers_in_DCGs)
res.DCG_Oligodendrocytes_df$Nr_OligodendrocytesMarkers_in_DCGs=as.numeric(res.DCG_Oligodendrocytes_df$Nr_OligodendrocytesMarkers_in_DCGs)
res.DCG_Oligodendrocytes_df=res.DCG_Oligodendrocytes_df%>%rownames_to_column("Brain-region")%>%mutate(Representation=if_else(Nr_OligodendrocytesMarkers_in_DCGs>Expected_OligodendrocytesMarkers_in_DCGs,true = "Over",false = "Under"))
write.table(res.DCG_Astrocytes_df,"DCG_Astrocyte_Enrichment.txt",sep = "\t",col.names = T,row.names = F,quote = F)
write.table(res.DCG_Endothelial_df,"DCG_Endothelial_Enrichment.txt",sep = "\t",col.names = T,row.names = F,quote = F)
write.table(res.DCG_Microglia_df,"DCG_Microglia_Enrichment.txt",sep = "\t",col.names = T,row.names = F,quote = F)
write.table(res.DCG_Neurons_df,"DCG_Neurons_Enrichment.txt",sep = "\t",col.names = T,row.names = F,quote = F)
write.table(res.DCG_Oligodendrocytes_df,"DCG_Oligodendrocytes_Enrichment.txt",sep = "\t",col.names = T,row.names = F,quote = F)
#Validation with Berchtold dataset
gse48350_diffcoexp_files=list.files(path = "/Users/sandeepamberkar/Work/Data/AD_GSE48350/",pattern = "*diffcoexp.RDS",full.names = T)
gse48350_brain_regions.diffcoexp=vector(mode = "list",length = 4)
names(gse48350_brain_regions.diffcoexp)=gsub(pattern = "./|diffcoexp.RDS",replacement = "",x = gse48350_diffcoexp_files)%>%gsub(pattern = "/UsersandeepamberkaWorDatAD_GSE4835/",replacement = "")
for(f in 1:4){
gse48350_brain_regions.diffcoexp[[f]]=readRDS(gse48350_diffcoexp_files[f])
}
gse48350.DCGs=lapply(gse48350_brain_regions.diffcoexp,function(x)x$DCGs)
gse48350.DCLs=lapply(gse48350_brain_regions.diffcoexp,function(x)x$DCLs)
#Plot interesting results
foxo_subg_regions=c("Caudate_Nucleus","Frontal_Pole","Precentral_Gyrus","Temporal_Pole")
foxo_subg_list=vector(mode = "list",length = length(foxo_subg_regions))
foxo_subg_colours=brewer.pal(n = length(foxo_subg_regions),name = "YlGnBu")
names(foxo_subg_colours)=foxo_subg_regions
for(r in foxo_subg_regions){
subg=graph.data.frame(msbb_gse84422.DCLs[[r]][c(grep(pattern = paste(msbb_gse84422.DRGs[[r]]$DCG[grep(pattern = "FOXO",x = msbb_gse84422.DRGs[[r]]$Upstream_TFofDCG)]%>%droplevels,collapse = "|"),x = msbb_gse84422.DCLs[[r]]$Gene.1),
grep(pattern = paste(msbb_gse84422.DRGs[[r]]$DCG[grep(pattern = "FOXO",x = msbb_gse84422.DRGs[[r]]$Upstream_TFofDCG)]%>%droplevels,collapse = "|"),x = msbb_gse84422.DCLs[[r]]$Gene.2)),c('Gene.1','Gene.2')],directed = F)
V(subg)$colour=foxo_subg_colours[[r]]
foxo_subg_list[[r]]=subg
}
#Randomisation test to determine significance of top10 enriched pathways
random_enrichedKEGG.list=vector(mode = "list",length = 9)
names(random_enrichedKEGG.list)=c("AMYG","CN","HIPP","IFG","NAc","PCG","FC","PTMN","TP")
random_enrichedKEGG.list$AMYG=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Amygdala),replace = F))
random_enrichedKEGG.list$CN=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Caudate_Nucleus),replace = F))
random_enrichedKEGG.list$HIPP=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Hippocampus),replace = F))
random_enrichedKEGG.list$IFG=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Inferior_Frontal_Gyrus),replace = F))
random_enrichedKEGG.list$NAc=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Nucleus_Accumbens),replace = F))
random_enrichedKEGG.list$PCG=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Precentral_Gyrus),replace = F))
random_enrichedKEGG.list$FC=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Prefrontal_Cortex),replace = F))
random_enrichedKEGG.list$PTMN=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Putamen),replace = F))
random_enrichedKEGG.list$TP=replicate(1000,sample(x = levels(kegg_2016_pathways.df$ont),size = length(msbb_gse84422_DCGs_KEGG.list$Temporal_Pole),replace = F))
random_enrichedKEGG_freq.matrix=matrix(NA,ncol = 8,nrow = 1000)
colnames(random_enrichedKEGG_freq.matrix)=names(random_enrichedKEGG.list)[-9]
for(i in 1:dim(random_enrichedKEGG_freq.matrix)[1]){
random_enrichedKEGG_freq.matrix[i,]=unlist(lapply(random_enrichedKEGG.list[-9],function(y)length(intersect(y[,i],names(table(unlist(lapply(random_enrichedKEGG.list[-9],function(x)x[,i])))[1:10])))))
}
#Compare with DEGs generated for the same dataset
msbb_gse84422_GPL96_97_samplesToAnalyse.exprs=readRDS("msbb_gse84422_GPL96_97_samplesToAnalyse_exprs.RDS")
msbb_gse84422_GPL96_97_samplesToAnalyse=readRDS("msbb_gse84422_GPL96_97_samplesToAnalyse.RDS")
msbb_gse84422.DEGs=vector(mode = "list",length = length(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs))
names(msbb_gse84422.DEGs)=names(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs)
for(i in 1:17){
c_exprs=msbb_gse84422_GPL96_97_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL96_97_samplesToAnalyse[[i]]$SampleType=="CONTROL"]
d_exprs=msbb_gse84422_GPL96_97_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL96_97_samplesToAnalyse[[i]]$SampleType=="AD"]
group=factor(c(rep("CONTROL",dim(c_exprs)[2]),rep("AD",dim(d_exprs)[2])))
design_df=model.matrix(~0+group)
colnames(design_df)=levels(group)
contrasts_matrix=makeContrasts(CONTROL-AD,levels = design_df)
fit=lmFit(object = cbind.data.frame(c_exprs,d_exprs),design = design_df)
fit2=contrasts.fit(fit,contrasts_matrix)
fit2=eBayes(fit2,trend = T)
msbb_gse84422.DEGs[[i]]=topTable(fit = fit2,coef = 1,number = dim(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs[[i]])[1],adjust.method = "BH",p.value = 0.1)%>%rownames_to_column("Gene")
}
msbb_gse84422.DEGs=Filter(f = function(x)dim(x)[1]>0,x = msbb_gse84422.DEGs)
msbb_gse84422.DEGs$`Inferior Temporal Gyrus`$EntrezID=unname(mapIds(x = org.Hs.eg.db,keys = msbb_gse84422.DEGs$`Inferior Temporal Gyrus`$Gene,column = "ENTREZID",keytype = "SYMBOL"))
msbb_gse84422.DEGs$`Putamen`$EntrezID=unname(mapIds(x = org.Hs.eg.db,keys = msbb_gse84422.DEGs$`Putamen`$Gene,column = "ENTREZID",keytype = "SYMBOL"))