forked from raylim/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecurVcf.R
More file actions
48 lines (39 loc) · 1.74 KB
/
recurVcf.R
File metadata and controls
48 lines (39 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env Rscript
# output positions that occur in more than one vcf file in bed format
suppressPackageStartupMessages(library("optparse"));
suppressPackageStartupMessages(library("plyr"));
suppressPackageStartupMessages(library("VariantAnnotation"));
#options(warn = -1, error = quote({ traceback(2); q('no', status = 1) }))
#options(error = recover)
options(error = quote(dump.frames("testdump", TRUE)))
optList <- list(
make_option("--genome", default = 'hg19', help = "genome build [default %default]"),
make_option("--tumor", default = NULL, help = "tumor sample"),
make_option("--outFile", default = NULL, help = "output file [default %default]"))
parser <- OptionParser(usage = "%prog vcf.files", option_list = optList);
arguments <- parse_args(parser, positional_arguments = T);
opt <- arguments$options;
if (is.null(opt$outFile)) {
cat("Need output file\n");
print_help(parser);
stop();
} else if (length(arguments$args) <= 1) {
cat("Need vcf files\n");
print_help(parser);
stop();
}
files <- arguments$args;
vcfs <- list()
for (f in files) {
vcf <- readVcf(f, genome = opt$genome)
tum <- ifelse(opt$tumor %in% colnames(geno(vcf)$GT), opt$tumor, "TUMOR")
gt <- geno(vcf)$GT[, tum]
vcf <- vcf[gt != "./." & gt != "0/0" & gt != "0", ]
vcfs <- append(vcfs, vcf)
}
all <- do.call('rbind', lapply(vcfs, function(x) as.data.frame(subset(rowData(x), FILTER == "PASS"))))
all <- all[, c("seqnames", "start", "end")]
cnt <- ddply(all, .(seqnames, start, end), nrow)
cnt <- subset(cnt, V1 > 1)
write(paste(cnt[,1], ":", cnt[,2], "-", cnt[,3], sep = ''), file = opt$outFile)
#write.table(cnt[,c(1:3)], file = opt$outFile, sep = '\t', quote = F, row.names = F, col.names = F)