-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmsbb_array_diffcoexp2.R
More file actions
110 lines (87 loc) · 9.27 KB
/
msbb_array_diffcoexp2.R
File metadata and controls
110 lines (87 loc) · 9.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
library(org.Hs.eg.db)
library(dplyr)
library(magrittr)
library(data.table)
library(diffcoexp)
library(GEOquery)
#
#
#
msbb_gse84422_series_matrix.GPL96=getGEO(filename = "../../../GSE84422-GPL96_series_matrix.txt.gz",GSEMatrix = F,AnnotGPL = T)
msbb_gse84422_series_matrix.GPL97=getGEO(filename = "../../../GSE84422-GPL97_series_matrix.txt.gz",GSEMatrix = F,AnnotGPL = T)
msbb_gse84422_series_matrix.GPL570=getGEO(filename = "../../../GSE84422-GPL570_series_matrix.txt.gz",GSEMatrix = F,AnnotGPL = T)
msbb_gse84422.fData=vector(mode = "list",length = 3)
names(msbb_gse84422.fData)=c("GPL96","GPL97","GPL570")
msbb_gse84422.fData$GPL96=fData(msbb_gse84422_series_matrix.GPL96)[,c(1:2,11:13)]
msbb_gse84422.fData$GPL97=fData(msbb_gse84422_series_matrix.GPL97)[,c(1:2,11:13)]
msbb_gse84422.fData$GPL570=fData(msbb_gse84422_series_matrix.GPL570)[,c(1:2,11:13)]
msbb_gse84422.pData=vector(mode = "list",length = 3)
names(msbb_gse84422.pData)=c("GPL96","GPL97","GPL570")
msbb_gse84422.pData$GPL96=pData(phenoData(msbb_gse84422_series_matrix.GPL96))
msbb_gse84422.pData$GPL97=pData(phenoData(msbb_gse84422_series_matrix.GPL97))
msbb_gse84422.pData$GPL570=pData(phenoData(msbb_gse84422_series_matrix.GPL570))
msbb_gse84422.pData$GPL96$pseudoSampleID=msbb_gse84422.pData$GPL97$pseudoSampleID=paste("pSample",1:dim(msbb_gse84422.pData$GPL96)[1],sep = "")
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`brain region:ch1`=gsub(pattern = "^ ",replacement = "",x = x$`brain region:ch1`);x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`clinical dementia rating:ch1`=as.numeric(gsub(pattern = "^ ",replacement = "",x = x$`clinical dementia rating:ch1`));x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`braak neurofibrillary tangle score:ch1`=as.numeric(gsub(pattern = "^ ",replacement = "",x = x$`braak neurofibrillary tangle score:ch1`));x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`average neuritic plaque density:ch1`=as.numeric(gsub(pattern = "^ ",replacement = "",x = x$`average neuritic plaque density:ch1`));x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`neuropathological category:ch1`=gsub(pattern = "^ ",replacement = "",x = x$`neuropathological category:ch1`);x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$`SampleType`="OTHER";x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$SampleType[which(x$`neuropathological category:ch1`=="Normal"&x$`clinical dementia rating:ch`<=0.5&x$`braak neurofibrillary tangle score:ch1`<=3)]="CONTROL";x})
msbb_gse84422.pData=lapply(msbb_gse84422.pData,function(x){x$SampleType[which((x$`neuropathological category:ch1`=="Probable AD"|x$`neuropathological category:ch1`=="Possible AD"|x$`neuropathological category:ch1`=="definite AD")&x$`clinical dementia rating:ch`>=1&x$`braak neurofibrillary tangle score:ch1`>=4)]="AD";x})
msbb_gse84422_GPL96_97_samples.Control=msbb_gse84422.pData$GPL96$pseudoSampleID[which(msbb_gse84422.pData$GPL96$`neuropathological category:ch1`=="Normal"&msbb_gse84422.pData$GPL96$`clinical dementia rating:ch`<=0.5&msbb_gse84422.pData$GPL96$`braak neurofibrillary tangle score:ch1`<=3)]
msbb_gse84422_GPL96_97_samples.AD=msbb_gse84422.pData$GPL96$pseudoSampleID[which(msbb_gse84422.pData$GPL96$`neuropathological category:ch1`=="Normal"&msbb_gse84422.pData$GPL96$`clinical dementia rating:ch`<=0.5&msbb_gse84422.pData$GPL96$`braak neurofibrillary tangle score:ch1`<=3)]
msbb_gse84422.exprs=vector(mode = "list",length = 3)
names(msbb_gse84422.exprs)=c("GPL96","GPL97","GPL570")
msbb_gse84422.exprs$GPL96=exprs(msbb_gse84422_series_matrix.GPL96)
msbb_gse84422.exprs$GPL97=exprs(msbb_gse84422_series_matrix.GPL97)
msbb_gse84422.exprs$GPL570=data.frame(exprs(msbb_gse84422_series_matrix.GPL570))
colnames(msbb_gse84422.exprs$GPL96)=colnames(msbb_gse84422.exprs$GPL97)=msbb_gse84422.pData$GPL96$pseudoSampleID
msbb_gse84422_exprs.GPL96_97=rbind.data.frame(msbb_gse84422.exprs$GPL96,msbb_gse84422.exprs$GPL97)
msbb_gse84422_exprs.GPL96_97$GeneSymbol=c(msbb_gse84422.fData$GPL96$`Gene Symbol`,msbb_gse84422.fData$GPL97$`Gene Symbol`)
msbb_gse84422_exprs_GPL96_97.agg=aggregate.data.frame(x=msbb_gse84422_exprs.GPL96_97[,-which(colnames(msbb_gse84422_exprs.GPL96_97)=="GeneSymbol")],by=list(symbol=msbb_gse84422_exprs.GPL96_97$GeneSymbol),mean)
rownames(msbb_gse84422_exprs_GPL96_97.agg)=msbb_gse84422_exprs_GPL96_97.agg$symbol
msbb_gse84422_exprs_GPL96_97.agg=msbb_gse84422_exprs_GPL96_97.agg[,-which(colnames(msbb_gse84422_exprs_GPL96_97.agg)=="symbol")]
msbb_gse84422.exprs$GPL570$GeneSymbol=msbb_gse84422.fData$GPL570$`Gene Symbol`
msbb_gse84422.exprs$GPL570=aggregate.data.frame(x=msbb_gse84422.exprs$GPL570[,-which(colnames(msbb_gse84422.exprs$GPL570)=="GeneSymbol")],by=list(symbol=msbb_gse84422.exprs$GPL570$GeneSymbol),mean)
rownames(msbb_gse84422.exprs$GPL570)=msbb_gse84422.exprs$GPL570$symbol
msbb_gse84422.exprs$GPL570=msbb_gse84422.exprs$GPL570[,-which(colnames(msbb_gse84422.exprs$GPL570)=="symbol")]
msbb_gse84422_GPL96_97_byRegion.exprs=vector(mode = "list",length = 17)
msbb_gse84422_GPL570_byRegion.exprs=vector(mode = "list",length = 2)
names(msbb_gse84422_GPL96_97_byRegion.exprs)=unique(msbb_gse84422.pData$GPL96$`brain region:ch1`)
names(msbb_gse84422_GPL570_byRegion.exprs)=unique(msbb_gse84422.pData$GPL570$`brain region:ch1`)
msbb_gse84422_GPL96_97_samplesToAnalyse=lapply(unique(msbb_gse84422.pData$GPL96$`brain region:ch1`),function(y)msbb_gse84422.pData$GPL96%>%filter(`brain region:ch1`==y&SampleType!="OTHER")%>%select(c(SampleType,pseudoSampleID)))
names(msbb_gse84422_GPL96_97_samplesToAnalyse)=unique(msbb_gse84422.pData$GPL96$`brain region:ch1`)
msbb_gse84422_GPL570_samplesToAnalyse=lapply(unique(msbb_gse84422.pData$GPL570$`brain region:ch1`),function(y)msbb_gse84422.pData$GPL570%>%filter(`brain region:ch1`==y&SampleType!="OTHER")%>%select(c(SampleType,geo_accession)))
names(msbb_gse84422_GPL570_samplesToAnalyse)=unique(msbb_gse84422.pData$GPL570$`brain region:ch1`)
msbb_gse84422_GPL96_97_samplesToAnalyse.exprs=lapply(msbb_gse84422_GPL96_97_samplesToAnalyse,function(y)msbb_gse84422_exprs_GPL96_97.agg[,colnames(msbb_gse84422_exprs_GPL96_97.agg)%in%y$pseudoSampleID])
msbb_gse84422_GPL570_samplesToAnalyse.exprs=lapply(msbb_gse84422_GPL570_samplesToAnalyse, function(y)msbb_gse84422.exprs$GPL570[,colnames(msbb_gse84422.exprs$GPL570)%in%y$geo_accession])
# saveRDS(msbb_gse84422_GPL570_samplesToAnalyse,"msbb_gse84422_GPL570_samplesToAnalyse.RDS")
# saveRDS(msbb_gse84422_GPL570_samplesToAnalyse.exprs,"msbb_gse84422_GPL570_samplesToAnalyse_exprs.RDS")
# saveRDS(msbb_gse84422_GPL96_97_samplesToAnalyse,"msbb_gse84422_GPL96_97_samplesToAnalyse.RDS")
# saveRDS(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs,"msbb_gse84422_GPL96_97_samplesToAnalyse_exprs.RDS")
setwd("/shared/hidelab2/user/md4zsa/Work/Data/MSBB_Array19/GSE84422")
msbb_gse84422_GPL96_97_samplesToAnalyse.exprs=readRDS("msbb_gse84422_GPL96_97_samplesToAnalyse_exprs.RDS")
msbb_gse84422_GPL96_97_samplesToAnalyse=readRDS("msbb_gse84422_GPL96_97_samplesToAnalyse.RDS")
msbb_gse84422_GPL570_samplesToAnalyse.exprs=readRDS("msbb_gse84422_GPL570_samplesToAnalyse_exprs.RDS")
msbb_gse84422_GPL570_samplesToAnalyse=readRDS("msbb_gse84422_GPL570_samplesToAnalyse.RDS")
#Diffcoexp analysis
for(i in 1:17){
c_exprs=msbb_gse84422_GPL96_97_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL96_97_samplesToAnalyse[[i]]$SampleType=="CONTROL"]
d_exprs=msbb_gse84422_GPL96_97_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL96_97_samplesToAnalyse[[i]]$SampleType=="AD"]
diffcoexp_out=diffcoexp(exprs.1 = c_exprs,exprs.2 = d_exprs,r.method="spearman",rth=0.6,q.diffth=0.1,q.dcgth=0.1)
saveRDS(diffcoexp_out,paste(names(msbb_gse84422_GPL96_97_samplesToAnalyse.exprs)[i],"diffcoexp_results.RDS",sep = "_"))
proc.time()
}
for(i in 1:2){
c_exprs=msbb_gse84422_GPL570_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL570_samplesToAnalyse[[i]]$SampleType=="CONTROL"]
d_exprs=msbb_gse84422_GPL570_samplesToAnalyse.exprs[[i]][,msbb_gse84422_GPL570_samplesToAnalyse[[i]]$SampleType=="AD"]
diffcoexp_out=diffcoexp(exprs.1 = c_exprs[,],exprs.2 = d_exprs[,],r.method="spearman",rth=0.6,q.diffth=0.1,q.dcgth=0.1)
saveRDS(diffcoexp_out,paste(names(msbb_gse84422_GPL570_samplesToAnalyse.exprs)[i],"diffcoexp_results.RDS",sep = "_"))
}
msbb_gse84422_sample_breakdown_GPL96_97.NP1=lapply(unique(msbb_gse84422.pData$GPL96$`neuropathological category:ch1`),function(x)msbb_gse84422.pData$GPL96%>%filter(`neuropathological category:ch1`==x)%>%select(c(`brain region:ch1`,`average neuritic plaque density:ch1`))%>%table)
msbb_gse84422_sample_breakdown_GPL570.NP1=lapply(unique(msbb_gse84422.pData$GPL570$`neuropathological category:ch1`),function(x)msbb_gse84422.pData$GPL570%>%filter(`neuropathological category:ch1`==x)%>%pull(`brain region:ch1`)%>%table)
msbb_gse84422_sample_breakdown_GPL96_97.Braak=lapply(c(2,4,6),function(x)msbb_gse84422.pData$GPL96%>%filter(`braak neurofibrillary tangle score:ch1`==x)%>%pull(`brain region:ch1`)%>%table)
msbb_gse84422_sample_breakdown_GPL570.Braak=lapply(c(2,4,6),function(x)msbb_gse84422.pData$GPL570%>%filter(`braak neurofibrillary tangle score:ch1`==x)%>%pull(`brain region:ch1`)%>%table)
names(msbb_gse84422_sample_breakdown_GPL96_97.Braak)=names(msbb_gse84422_sample_breakdown_GPL570.Braak)=c(2,4,6)
names(msbb_gse84422_sample_breakdown_GPL96_97.NP1)=names(msbb_gse84422_sample_breakdown_GPL570.NP1)=unique(msbb_gse84422.pData$GPL570$`neuropathological category:ch1`)