forked from raylim/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrbind.R
More file actions
executable file
·109 lines (97 loc) · 3.72 KB
/
rbind.R
File metadata and controls
executable file
·109 lines (97 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env Rscript
# rbinds together tab-delimited tables and outputs to STDOUT
suppressPackageStartupMessages(library("optparse"));
options(warn = -1, error = quote({ traceback(); q('no', status = 1) }))
optList <- list(
make_option("--sampleName", action = "store_true", default = F, help = "add samplename column [default %default]"),
make_option("--normalLast", action = "store_true", default = F, help = "normal sample last [default %default]"),
make_option("--tumorNormal", action = "store_true", default = F, help = "add tumor-normal samplename column [default %default]"))
parser <- OptionParser(usage = "%prog [options] vcf.file", option_list = optList);
arguments <- parse_args(parser, positional_arguments = T);
opt <- arguments$options;
files <- arguments$args;
Data <- list();
for (f in files) {
X <- read.table(file = f, sep = '\t', as.is = T, comment.char = '', quote = '');
if (nrow(X) <= 1) {
next
}
h <- X[1,];
h <- sub('#', '', h)
colnames(X) <- h
X <- X[-1, ]
if (opt$sampleName) {
sname <- sub('\\..*', '', f)
sname <- sub('.*/', '', sname)
X[,"SAMPLE"] <- sname
h <- c(h, "SAMPLE")
h <- sub(paste(sname, '\\.', sep = ''), 'SAMPLE.', h)
colnames(X) <- h
Data[[sname]] <- X
}
if (opt$tumorNormal) {
sname <- sub('\\..*', '', f)
sname <- sub('.*/', '', sname)
tumor <- sub('_.*', '', sname)
normal <- sub('.*_', '', sname)
X[,"TUMOR_SAMPLE"] <- tumor
h <- c(h, "TUMOR_SAMPLE")
X[,"NORMAL_SAMPLE"] <- normal
h <- c(h, "NORMAL_SAMPLE")
h <- sub(paste(tumor, '\\.', sep = ''), 'TUMOR.', h)
h <- sub(paste(normal, '\\.', sep = ''), 'NORMAL.', h)
colnames(X) <- h
Data[[sname]] <- X
}
if (opt$normalLast) {
sname <- sub('\\..*', '', f)
sname <- sub('.*/', '', sname)
normal <- sub('.*_', '', sname)
normFields <- grep(paste(normal, ".", sep = ""), h, fixed = T, value = T)
fields <- sub('.*\\.', '', normFields)
x <- grep(paste("\\.", fields[1], "$", sep = ""), h, perl = T, value = T)
samples <- sub('\\..*', '', x)
tumorSamples <- samples[-which(samples == normal)]
for (i in 1:length(tumorSamples)) {
tumor <- tumorSamples[i]
hh <- h
XX <- X
for (otherTumor in tumorSamples[-i]) {
x <- grep(paste("^", otherTumor, "\\.", sep = ""), hh, perl = T)
hh <- hh[-x]
XX <- XX[, -x]
}
XX[,"TUMOR_SAMPLE"] <- tumor
hh <- c(hh, "TUMOR_SAMPLE")
XX[,"NORMAL_SAMPLE"] <- normal
hh <- c(hh, "NORMAL_SAMPLE")
hh <- sub(paste(tumor, '\\.', sep = ''), 'TUMOR.', hh)
hh <- sub(paste(normal, '\\.', sep = ''), 'NORMAL.', hh)
colnames(XX) <- hh
Data[[tumor]] <- XX
}
}
}
if (length(Data) == 0) {
quit(save = 'no', status = 0)
}
fields <- unique(unlist(lapply(Data, colnames)))
for (f in names(Data)) {
miss <- setdiff(fields, colnames(Data[[f]]));
Data[[f]][,miss] <- NA;
Data[[f]] <- Data[[f]][, fields];
}
table.merged <- do.call(rbind, Data);
rownames(table.merged) <- NULL
if (opt$sampleName) {
x <- which(colnames(table.merged) == "SAMPLE")
y <- which(colnames(table.merged) != "SAMPLE")
table.merged <- table.merged[, c(x,y)]
}
if (opt$tumorNormal || opt$normalLast) {
xx <- colnames(table.merged) == "TUMOR_SAMPLE" | colnames(table.merged) == "NORMAL_SAMPLE"
x <- which(xx)
y <- which(!xx)
table.merged <- table.merged[, c(x,y)]
}
write.table(table.merged, sep = '\t', row.names = F, quote = F)