-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathpyroHMMVcf.pl
More file actions
107 lines (89 loc) · 2.86 KB
/
pyroHMMVcf.pl
File metadata and controls
107 lines (89 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env perl
# convert pyrohmm results to vcf
use strict;
use warnings;
use Getopt::Std;
use List::MoreUtils qw(uniq);
my %opt;
getopts('hf:n:', \%opt);
my $usage = <<ENDL;
perl pyroHMMVcf.pl -f [ref fasta] -n [sample name]
ENDL
sub HELP_MESSAGE {
print STDERR $usage;
exit(1);
}
HELP_MESSAGE if $opt{h};
HELP_MESSAGE unless $opt{n};
HELP_MESSAGE unless $opt{f};
my $now = localtime;
my $sample = $opt{n};
open REF, $opt{f} . ".fai" or die "Unable to open reference index file\n";
my @contigs = <REF>;
my $vcfHeader = <<ENDL;
##fileformat=VCFv4.1
##fileDate=$now
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=WMQ,Number=1,Type=Integer,Description="Median mapping quality within window">
##FORMAT=<ID=WBQ,Number=1,Type=Integer,Description="Median base quality within window">
ENDL
for my $contig (@contigs) {
my @F = split /\t/, $contig;
$vcfHeader .= "##contig=<ID=$F[0],length=$F[1]>\n";
}
$vcfHeader .= "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t$sample\n";
print $vcfHeader;
my $format = "GT:WMQ:WBQ";
while (<>) {
chomp;
my @F = split / /;
my ($chrom, $pos, $call, $ref, $alt, $prob, $qual, $winMQ, $winBQ) = @F;
my @alts = split /,/, $alt;
@alts = uniq(@alts);
if ($ref eq "-") {
$pos--;
#print STDERR "querying $chrom:$pos-$pos\n";
open(SAMTOOLS, "samtools faidx $opt{f} $chrom:$pos-$pos |");
<SAMTOOLS>;
my $seq = <SAMTOOLS>;
chomp $seq;
$ref = $seq;
close(SAMTOOLS);
for $alt (@alts) {
$alt = $ref . $alt;
$alt =~ s/-//;
}
}
my @nonRefAlts;
for my $i (@alts) {
push @nonRefAlts, $i if ($i ne $ref);
}
if (grep /^-$/, @nonRefAlts) {
$pos--;
#print STDERR "querying $chrom:$pos-$pos\n";
open(SAMTOOLS, "samtools faidx $opt{f} $chrom:$pos-$pos |");
<SAMTOOLS>;
my $seq = <SAMTOOLS>;
chomp $seq;
close(SAMTOOLS);
$ref = $seq . $ref;
for $alt (@nonRefAlts) {
$alt = $seq . $alt;
$alt =~ s/-//;
}
#$alts =~ s/-/./;
}
my $alts = join ',', @nonRefAlts;
my $GT;
$GT = "0/1" if ($call eq "het" && scalar(@nonRefAlts) == 1);
$GT = "1/2" if ($call eq "het" && scalar(@nonRefAlts) == 2);
$GT = "0/0" if ($call eq "hom" && $ref eq $alts);
$GT = "1/1" if ($call eq "hom" && $ref ne $alts);
$GT = "0/1" if ($call eq "del" && scalar(@alts) == 2);
$GT = "1/1" if ($call eq "del" && scalar(@alts) == 1);
$GT = "0/1" if ($call eq "ins" && scalar(@alts) == 2 && grep /^$ref$/, @alts);
$GT = "1/1" if ($call eq "ins" && scalar(@alts) == 1);
$GT = "1/2" if ($call eq "mix" && scalar(@alts) == 2 && !grep /^$ref$/, @alts);
my $sampleFormat = "$GT:$winMQ:$winBQ";
print "$chrom\t$pos\t.\t$ref\t$alts\t$qual\tPASS\t.\t$format\t$sampleFormat\n";
}