-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript_diffEvalue.R
More file actions
executable file
·111 lines (85 loc) · 3.44 KB
/
script_diffEvalue.R
File metadata and controls
executable file
·111 lines (85 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
###################different blast threshold for E-value####################
load('/home/Yulong/RESEARCH/neuro/Bioinfor/PhyloViz/wholePhyloDataBlast.RData')
thresVec <- c(1e-3, 1e-4, 5e-4)
for (i in thresVec) {
wholeProfile <- apply(wholePhyloData, 1:2, function(x) {
if (x < i) {
return(1)
} else {
return(0)
}
})
hsaNum <- ncol(wholeProfile) + 1
wholeProfile <- cbind(wholeProfile, rep(1, nrow(wholeProfile)))
colnames(wholeProfile)[hsaNum] <- 'hsa'
fileName <- paste0('diffEvalue/wholeProfile', i, '.RData')
save(wholeProfile, file = fileName)
}
############################################################################
#########################generate cor and jaccard matrix##################
library('bigmemory')
library('vegan')
library('compiler')
enableJIT(3)
## file name
thresVec <- c('1e-03', '1e-04', '5e-04')
phyloFileVec <- paste0('diffEvalue/wholeProfile', thresVec, '.RData')
for (i in 1:3) {
## load profile data
load(phyloFileVec[i])
## jaccard similariy
jaccardSim <- 1 - vegdist(wholeProfile[1:10, ], method = 'jaccard', diag = TRUE, upper = TRUE)
jaccardSim <- as.matrix(jaccardSim)
jaccardSim <- diag(1, ncol(jaccardSim), nrow(jaccardSim)) + jaccardSim
simFilePrefix <- paste0('jaccardSim', thresVec[i])
as.big.matrix(jaccardSim, backingfile = paste0(simFilePrefix, '.bin'), descriptorfile=paste0(simFilePrefix, '.desc'), backingpath = 'diffEvalue')
## correlation coefficient
wholeCor <- cor(t(wholeProfile))
corFilePrefix <- paste0('wholeCor', thresVec[i])
as.big.matrix(wholeCor, backingfile = paste0(corFilePrefix, '.bin'), descriptorfile=paste0(corFilePrefix, '.desc'), backingpath = 'diffEvalue')
}
enableJIT(0)
########################################################################
#######################select Jaccard top 500########################
library('bigmemory')
library('foreach')
library('doMC')
registerDoMC(8)
## file name
thresVec <- c('1e-03', '1e-04', '5e-04')
filePath <- 'diffEvalue'
for (thresi in 1:3) {
jaccardDescPath <- paste0(filePath, '/jaccardSim', thresVec[thresi], '.desc')
wholeCorDescPath <- paste0(filePath, '/wholeCor', thresVec[thresi], '.desc')
jaccardSim <- attach.big.matrix(jaccardDescPath)
wholeCor <- attach.big.matrix(wholeCorDescPath)
interList <- foreach(i = 1:ncol(jaccardSim)) %dopar% {
corVec <- wholeCor[, i]
jacSimVec <- jaccardSim[, i]
corSetNum <- which(corVec >= 0)
jacSimVec <- jacSimVec[corSetNum]
## delete self nodes
jacSimVec <- jacSimVec[names(jacSimVec) != rownames(jaccardSim)[i]]
jacSimVec <- sort(jacSimVec, decreasing = TRUE)
eachTop500 <- jacSimVec[1:500]
}
interNames <- sapply(interList, names)
colnames(interNames) <- rownames(jaccardSim)
interJac <- sapply(interList, unname)
colnames(interJac) <- rownames(jaccardSim)
## get cor
interCor <- matrix(ncol = ncol(interNames), nrow = nrow(interNames))
for (i in 1:ncol(interNames)) {
corIdx <- match(interNames[, i], rownames(wholeCor))
interCor[, i] <- wholeCor[corIdx, i]
}
colnames(interCor) <- rownames(jaccardSim)
## round jaccard similarity and cor
interJac <- round(interJac, 2)
interCor <- round(interCor, 2)
top500List <- list(interNames = interNames,
interJac = interJac,
interCor = interCor)
save(top500List, file = paste0(filePath, '/top500List', thresVec[thresi], '.RData'))
}
#####################################################################