diff --git a/Degner/Bam/bamproc.degner.sh b/Degner/Bam/bamproc.degner.sh new file mode 100644 index 0000000..5974dfe --- /dev/null +++ b/Degner/Bam/bamproc.degner.sh @@ -0,0 +1,90 @@ +#Processing bam files +#DEP: rmdups.pl +#DEP: mergebams.pl + + +### +#rmDups +### +email='alva.rani@scilifelab.se' +projid='b2012046' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/bam' +sbatchdir='/proj/b2012046/rani/scripts/degner/bamscripts' +bamfile='/proj/b2012046/rani/analysis/degner' +cd $sbatchdir +find $bamfile -name 'accepted_hits.bam' | awk -F'/' -v execdir=$execdir -v sbatchdir=$sbatchdir -v projid=$projid -v email=$email '{print "perl", execdir"/rmdups.pl", $0, $7, sbatchdir, projid, email;}' >rmdups.sh +sh rmdups.sh +find . -name '*.sbatch' | xargs -I% sbatch % +#### +find . -name '*.sorted' | wc -l + +#Check: +find $bamfile -name '*.bam.sorted.nodup' | xargs -I% ls -lrth % +find $bamfile -name '*.rmdups.stderr' | xargs -I% grep -i 'err' +find $bamfile -name '*.rmdups.stderr' | xargs -I% grep -i 'warn' + +#rm unsorted bams +find $bamfile -name '*.bam' | xargs -I% rm % + +#rename bamfiles +find -name '*.bam.sorted' | xargs -I% echo rename '.bam.sorted' '.sorted.bam' % >cmds.sh +sh cmds.sh + +#rm unsorted bams +find $bamdir -name 'accepted_hits.bam' | grep -v 'mmseq' | xargs -I% rm % + +#rename bamfiles +find $bamdir -name '*.bam.sorted.nodup' | xargs -I% echo rename '.bam.sorted.nodup' '.sorted.nodup.bam' % >cmds.sh +sh cmds.sh +#### + +### +#Merge and sort bam files +### +projid='b2012046' +email='alva.rani@scilifelab.se' +sbatchdir='/proj/b2012046/rani/scripts/degner/mergebamScripts' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/bam' +bamsort='/proj/b2012046/rani/analysis/degner/' +outdir='/proj/b2012046/rani/analysis/degner/mergebam' +time='11:00:00' +cd $sbatchdir +find $bamsort -name '*.sorted.nodup.bam' | sed 's/\..*//' | sort -u >samples.list +cat samples.list | xargs -I% basename % | xargs -I% echo perl ${execdir}'/mergebams.pl' % $bamsort $outdir $sbatchdir $projid $email $time >cmds.sh +sh cmds.sh +find $sbatchdir -name '*.mergesams.sbatch' | xargs -I% sbatch % + + + +## Submitted +#Check: +find ${bamfile}/info/*.stderr | xargs -I% grep -i 'err' % +find ${bamfile}/info/*.mergesams.*.stderr | xargs -I% grep -i 'warn' % +#### + +### +#Create indexes +### +bamsort='/proj/b2012046/rani/analysis/degner/mergebam' +find $bamsort -maxdepth 1 -name '*.bam' | xargs -I% echo samtools index % > cmd.sh +#Manually add sbatch header to the cmds.sh script +(cat <bamindex.sbatchheader +cat bamindex.sbatchheader cmd.sh >bamindex.sbatch +sbatch bamindex.sbatch + +############################ +## Done all steps here without errors + diff --git a/Degner/Bam/mergebams.pl b/Degner/Bam/mergebams.pl new file mode 100644 index 0000000..d42669f --- /dev/null +++ b/Degner/Bam/mergebams.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +#find '/proj/b2011075/analysis/hs_pipe/outdata/run1' -name 'accepted_hits.bam' | awk -F'/' '{print "perl /bubo/home/h26/edsgard/glob/code/ngs_pipes/hs_pipe/rmdups.pl", $0, $10, "/proj/b2011075/analysis/hs_pipe/outscripts/run1 b2010035 daniel.edsgard@scilifelab.se";}' >rmdups.sh + +use strict; + +#my ($sampleid, $bamdir, $outdir, $sbatch_dir, $aid, $em) = @ARGV; + +mergesams(@ARGV); + +sub mergesams { + + use File::Spec::Functions; + use File::Basename; + + my $sampleid = shift @_; + my $bamdir = shift @_; + my $outdata_dir = shift @_; + my $sbatch_dir = shift @_; + my $aid = shift @_; + my $em = shift @_; + my $time = shift @_; + + if(!defined($time)){ + $time = '23:59:00'; + } + + my $sbatch_file = catfile($sbatch_dir, "$sampleid.mergesams.sbatch"); + my $outdata_infodir = catfile($outdata_dir, 'info'); + + #get bamfiles to be merged + my @bamfiles = `find $bamdir -name '*.bam' | grep $sampleid`; + chomp(@bamfiles); + + #set outfile + my $bamfile_merged = catfile($outdata_dir, $sampleid . '.merged.bam'); + + #make dirs + `mkdir -p $outdata_infodir;`; #Creates the data and info dir + `mkdir -p $sbatch_dir;`; #Creates the sbatch-script dir + + open (SBATCH, '>', $sbatch_file) or die("Can't write to $sbatch_file: $!\n"); + + #print sbatch vars + print SBATCH "#!/bin/bash -l", "\n"; + print SBATCH "#SBATCH -A ", $aid, "\n"; + print SBATCH "#SBATCH -t ", $time, "\n"; + print SBATCH "#SBATCH -J mergesams_", $sampleid, "\n"; + print SBATCH "#SBATCH -p core -n 1", "\n"; + print SBATCH "#SBATCH -e $outdata_infodir/$sampleid" . '.mergesams.jid_%.stderr' . "\n"; + print SBATCH "#SBATCH -o $outdata_infodir/$sampleid" . '.mergesams.jid_%.stdout' . "\n"; + unless ($em eq 0) { + print SBATCH "#SBATCH --mail-type=All", "\n"; + print SBATCH "#SBATCH --mail-user=$em", "\n\n"; + } + + #print vars + print SBATCH 'module load bioinfo-tools' . "\n"; + print SBATCH 'module load samtools/0.1.18' . "\n"; + + #print cmd + print SBATCH 'samtools merge'. ' ' OUTPUT=' . $bamfile_merged . ' INPUT=' . join(' INPUT=', @bamfiles) ' . "\n"; + close(SBATCH); +} diff --git a/Degner/Basecount/basecount.degner.sh b/Degner/Basecount/basecount.degner.sh new file mode 100644 index 0000000..4163659 --- /dev/null +++ b/Degner/Basecount/basecount.degner.sh @@ -0,0 +1,145 @@ +#Basecount (allelic count of each allele) +#DEP: dphetfilter.R +#DEP: mpileup.allelecount.pl +#DEP: vcfI162basecount.sh + + +######## +#DP and HET filter +######## + +vcfdir='/proj/b2012046/rani/data/degvarcall' + +## +#VCF to tab format +## +PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH +PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB +vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome +vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome + +## +#Filter +## +#OUT: *.het.pos +mindp=10 +execdir='/bubo/home/h26/edsgard/glob/code/ase/sim' +Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp & +ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh +rename genome.DP.FORMAT.het.pos. . *.hetvars + +#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.) +find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh +sh cmds.sh +ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh +sh cmds.sh +ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list +cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.genome.DP.FORMAT.het.pos '>' %.hetvars.alleles >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & +ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh +rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp + + +################################################# +#BASECOUNT by MPILEUP (no varcall). +#First four of I16 == DP4 +################################################# +bamdir='/proj/b2012046/rani/analysis/degner/mergebam' +vcfdir='/proj/b2012046/rani/data/degvarcall' +bcdir='/proj/b2012046/rani/data/degnerbasecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +scriptdir='/proj/b2012046/rani/scripts/degner/basecount' +projid='b2012046' +email='alva.rani@scilifelab.se' +time='10:00:00' +ref='/bubo/home/h24/alvaj/glob/annotation/degner/reference/Homo_sapiens.maskedRefe.GRCh37.57.fa' + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list +cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh +sh cmds.sh +find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch % + +bcdir='/proj/b2012046/rani/data/basecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +find $bcdir -name '*nocall.vcf' >nocall.vcf.list +cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & + + +[alvaj@kalkyl4 basecount]$ nano basecount.sh +[alvaj@kalkyl4 basecount]$ cat basecount.sh +#Basecount (allelic count of each allele) +#DEP: dphetfilter.R +#DEP: mpileup.allelecount.pl +#DEP: vcfI162basecount.sh + + +######## +#DP and HET filter +######## + +vcfdir='/proj/b2012046/rani/data/varcalls' + +## +#VCF to tab format +## +PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH +PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB +vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome +vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome + +## +#Filter +## +#OUT: *.het.pos +mindp=10 +execdir='/bubo/home/h26/edsgard/glob/code/ase/sim' +Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp & +ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh +rename genome.DP.FORMAT.het.pos. . *.hetvars + +#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.) +find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh +sh cmds.sh +ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh +sh cmds.sh +ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list +cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.genome.DP.FORMAT.het.pos '>' %.hetvars.alleles >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & +ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh +rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp + + +################################################# +#BASECOUNT by MPILEUP (no varcall). +#First four of I16 == DP4 +################################################# +bamdir='/proj/b2012046/rani/analysis/gsnap/mergebam' +vcfdir='/proj/b2012046/rani/data/varcalls' +bcdir='/proj/b2012046/rani/data/gsnapbasecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +scriptdir='/proj/b2012046/rani/scripts/basecount' +projid='b2012046' +email='alva.rani@scilifelab.se' +time='10:00:00' +ref='/bubo/home/h24/alvaj/glob/annotation/gsnap/reference/reference.genome' + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list +cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh +sh cmds.sh +find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch % + +bcdir='/proj/b2012046/rani/data/basecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +find $bcdir -name '*nocall.vcf' >nocall.vcf.list +cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & + diff --git a/Degner/VArcall/degner.varcall.sh b/Degner/VArcall/degner.varcall.sh new file mode 100644 index 0000000..e57d11f --- /dev/null +++ b/Degner/VArcall/degner.varcall.sh @@ -0,0 +1,57 @@ +#Varcalling with SAMtools +#DEP: mpileup_multisample.pl + + +### +#VarCall (SAMtools) +### + +projid='b2012046' +email='alva.rani@scilifelab.se' +sbatchdir='/proj/b2012046/rani/scripts/degner/varscripts' +bamfile='/proj/b2012046/rani/analysis/degner/mergebam' +vcfdir='/proj/b2012046/rani/data/degvarcall' +ref='/bubo/home/h24/alvaj/glob/annotation/degner/reference/Homo_sapiens.maskedRefe.GRCh37.57.fa' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/varcall' + +cd $sbatchdir +# need to redo it again with merged bam files +#create file listing the bam files +allbamsfile=${vcfdir}/allbams.list +find $bamfile -maxdepth 1 -name '*.bam' >$allbamsfile + +#create region files +module load bioinfo-tools +module load samtools/0.1.18 +srun -p devel -A b2012046 -t 1:00:00 samtools faidx $ref & +vcfutils.pl splitchr -l 6600000 ${ref}.fai >${vcfdir}/genome.480.regs + + +#Run mpileup +cat ${vcfdir}/genome.480.regs | xargs -I% echo 'perl' ${execdir}/mpileup_multisample.pl $allbamsfile % $sbatchdir $projid $email $ref >cmds.sh +sh cmds.sh +find $sbatchdir -name '*.mpileup.sbatch' | xargs -I% sbatch % + + +#check status: +cd $vcfdir +lt *.stderr >allerrfiles.tmp +squeue -u alvaj | grep -v JOBID | awk '{print $1;}' | xargs -I% grep % ${vcfdir}/info/allerrfiles.tmp + +#Catenate all region-specific vcfs +find $vcfdir -name 'allbams.*.vcf' | xargs cat | grep -v '^#' >${vcfdir}/allbams.vcf.tmp +cat allbams.list.MT:1-16569.vcf | grep '^#' >header.tmp +cat header.tmp allbams.vcf.tmp >allbams.vcf +rm header.tmp allbams.vcf.tmp +wc -l allbams.vcf +#26 + +#Rm slashes ilon vcf sample header +vcfdir='/proj/b2012046/rani/data/varcalls' +cd ${vcfdir} +cat allbams.vcf | grep '^#' >header.tmp +cat header.tmp | sed 's/\/proj\/b2012046\/rani\/analysis\/degner\/mergebam\///g' | sed 's/\.bam//g' >file.tmp +mv file.tmp header.tmp +grep -v '^#' allbams.*.vcf >allbams.vcf.noheader +cat header.tmp allbams.vcf.noheader >allbams.vcf +rm allbams.vcf.noheader header.tmp diff --git a/Degner/VArcall/mpileup_multisample.pl b/Degner/VArcall/mpileup_multisample.pl new file mode 100644 index 0000000..4f24dd6 --- /dev/null +++ b/Degner/VArcall/mpileup_multisample.pl @@ -0,0 +1,73 @@ +#!/usr/bin/perl + +#find '/proj/b2011075/analysis/hs_pipe/outdata/run1/varcalls' -name 'allbams.chr_*' | xargs -I% echo 'perl /bubo/home/h26/edsgard/glob/code/ngs_pipes/hs_pipe/mpileup_multisample.pl' % '/proj/b2011075/analysis/hs_pipe/outscripts/run1/varcalls b2010035 daniel.edsgard@scilifelab.se' >cmds.sh + +#-q1 mapQ filter to get "unique" alignments. +#No max dp varFilter since RNA-seq data. + +use strict; + +my ($bamfilelist, $region, $ods, $aid, $em, $ref, $time, $partition) = @ARGV; + +if(!(defined($ref))){ + $ref = '/bubo/home/h26/edsgard/glob/annotation/human/Homo_sapiens.GRCh37.57.dna.concat.fa'; +} +if(!(defined($time))){ + $time = '23:00:00'; +} +if(!(defined($partition))){ + $partition = 'node'; +} + +mpileup(($bamfilelist, $region, $ods, $aid, $em, $ref, $time, $partition)); + +sub mpileup { + + use File::Spec::Functions; + use File::Basename; + + my $bamfilelist = shift @_; + my $region = shift @_; + my $ods = shift @_; + my $aid = shift @_; + my $em = shift @_; + my $ref = shift @_; + my $time = shift @_; + my $partition = shift @_; + + my $bamfiles_base = basename($bamfilelist); + my $sbatch_dir = $ods; + my $sbatch_file = catfile($sbatch_dir, $bamfiles_base . '.' . $region . '.mpileup.sbatch'); + my $outdata_dir = dirname($bamfilelist); + my $outdata_infodir = catfile($outdata_dir, 'info'); + + #make dirs + `mkdir -p $outdata_infodir;`; #Creates the data and info dir + `mkdir -p $sbatch_dir;`; #Creates the sbatch-script dir + + open (SBATCH, '>', $sbatch_file) or die("Can't write to $sbatch_file: $!\n"); + + #print sbatch vars + print SBATCH "#!/bin/bash -l", "\n"; + print SBATCH "#SBATCH -A ", $aid, "\n"; + print SBATCH "#SBATCH -t ", $time, "\n"; + print SBATCH "#SBATCH -J multimpileup", "\n"; + print SBATCH '#SBATCH -p ', $partition, ' -n 1', "\n"; + print SBATCH "#SBATCH -e $outdata_infodir/$bamfiles_base.$region.multimpileup" . '.jid_%j.stderr', "\n"; + print SBATCH "#SBATCH -o $outdata_infodir/$bamfiles_base.$region.multimpileup" . '.jid_%j.stdout', "\n"; + unless ($em eq 0) { + print SBATCH "#SBATCH --mail-type=All", "\n"; + print SBATCH "#SBATCH --mail-user=$em", "\n\n"; + } + + #print vars + print SBATCH 'module load bioinfo-tools' . "\n"; + print SBATCH 'module load samtools/0.1.18' . "\n"; + print SBATCH 'echo "Running on: $(hostname)"',"\n\n"; + + #print cmd + my $vcffile = $bamfilelist . '.' . $region . '.vcf'; + print SBATCH 'samtools mpileup -q1 -d10000 -L10000 -DSugf ' . $ref . ' -r ' . $region . ' -b ' . $bamfilelist. ' | bcftools view -vcg - | vcfutils.pl varFilter >' . $vcffile . "\n"; + + close(SBATCH); +} diff --git a/Degner/map/masked.degner.pl b/Degner/map/masked.degner.pl new file mode 100644 index 0000000..cff287a --- /dev/null +++ b/Degner/map/masked.degner.pl @@ -0,0 +1,88 @@ +#!/bin/perl +use strict; +use warnings; +no warnings ('uninitialized', 'substr'); + + +my $snpfile = $ARGV[0]; +my $sequencefile = $ARGV[1]; +my $chromosomename=$ARGV[2]; +#my $chr; + +open(SNP, "$snpfile"); +open(SEQUENCE,"genome.fa"); +open(NRSEQ, ">$chromosomename"); + +print_chromosome(); + +sub print_chromosome { +my @line; +my $line; +my $plusbase; +my $otherbase; +my $test_string; +my $NRsequence; +my $chr; my $ky; +my $seq; my $chr_snp; +my %seq; +#; +while (){ +chomp; +$NRsequence = $_; + $chr = $1 if ($NRsequence =~ />(\d+)\s(.*)/) || ($NRsequence =~ />(\w+)\s(.*)/ ); +$seq{$chr}.=$NRsequence unless ($NRsequence =~ />/); +} + +#close(SEQUENCE); +my $a = keys %seq; +print "keys:",$a; +#print $_,"\n\n" for keys %seq; + +#; +while($line=){ +@line = split(/\t/, $line); +#print @line, "\n"; +#if($line[3] eq "+"){ +$chr_snp =$line[0]; +$plusbase=$line[4]; +$otherbase=$line[3]; + +print "Ref:$plusbase,Alt:$otherbase\n"; + +#} +#elsif($line[3] eq "-"){ +#$plusbase = $line[4]; +#$otherbase=$line[5]; + +#$plusbase =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; +#$otherbase =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; +#} + +#print "$line[2]\n"; # +print(substr($seq{$chr_snp}, $line[2]-5, 10), "\n"); +$test_string = substr($seq{$chr_snp}, $line[2]-1, 1); +if( uc($plusbase) eq uc($test_string) ) { +print "OK\n"; +if(uc($plusbase) ne "C" && uc($otherbase) ne "C"){ +substr $seq{$chr_snp}, $line[2]-1, 1, "C"; +} elsif(uc($plusbase) ne "G" && uc($otherbase) ne "G"){ +substr $seq{$chr_snp}, $line[2]-1, 1, "G"; +} elsif(uc($plusbase) ne "A" && uc($otherbase) ne "A"){ +substr $seq{$chr_snp}, $line[2]-1, 1, "A"; +} elsif(uc($plusbase) ne "T" && uc($otherbase) ne "T"){ +substr $seq{$chr_snp}, $line[2]-1, 1, "T"; +} +} elsif($test_string eq "N"){ +print "Base was N in reference\n"; +} +else { +die "THE BASE GIVEN WAS NOT THE REFERENCE BASE, ABORTING MISSION!"; +print "THE BASE GIVEN WAS NOT THE REFERENCE BASE, ABORTING MISSION!"; +} +print(substr($seq{$chr_snp}, $line[2]-5, 10),"\n\n"); +} +#print NRSEQ (">chr\n$seq{$chr_snp}\n"); +#print NRSEQ ">chr",$_,"\n",$seq{$_},"\n" for each %seq; +foreach $ky (sort keys %seq) {print NRSEQ ">chr$ky\n$seq{$ky}\n";} +} + diff --git a/map/run.degner.sh b/Degner/map/run.degner.sh similarity index 100% rename from map/run.degner.sh rename to Degner/map/run.degner.sh diff --git a/map/tophat.gensbatch.pl b/Degner/map/tophat.gensbatch.pl similarity index 100% rename from map/tophat.gensbatch.pl rename to Degner/map/tophat.gensbatch.pl diff --git a/map/tophat.singleend.gensbatch.pl b/Degner/map/tophat.singleend.gensbatch.pl similarity index 100% rename from map/tophat.singleend.gensbatch.pl rename to Degner/map/tophat.singleend.gensbatch.pl diff --git a/GSNAP/bam/bamerror.run.sh b/GSNAP/bam/bamerror.run.sh new file mode 100644 index 0000000..ef60e50 --- /dev/null +++ b/GSNAP/bam/bamerror.run.sh @@ -0,0 +1,31 @@ + +###Test for 19 sam file +#created a errorlist of 19 files +# taking the the reads appreaed more than twice and taking it in a order +cat errorfiles.list | xargs -I% echo "samtools view "%" | awk '{print \$1;}' | sort | uniq -c >"%.reads2nmapped >cmds.sh + + + +errordir='/proj/b2012046/rani/analysis/gsnap/bamsort/errortest' + +ls -1 ${errordir}/*.reads2nmapped >reads2nmapped.files + +cat reads2nmapped.files | xargs -I% basename % | xargs -I% echo "awk '\$1 == 2 {print \$0;}'" ${errordir}/% " | wc -l >" %.n.uniq >cmds.sh +cat reads2nmapped.files | xargs -I% basename % | xargs -I% echo "awk '\$1 != 2 {print \$0;}'" ${errordir}/% " | wc -l >" %.n.nonuniq >cmds2.sh + + +bamfiles='/proj/b2012046/rani/analysis/gsnap/bamsort/errortest/errorbamfiles.list' +errordir='/proj/b2012046/rani/analysis/gsnap/bamsort/errortest' + +#Extract multimapped reads +ls -1 ${errordir}/*.reads2nmapped >reads2nmapped.files +cat reads2nmapped.files | xargs -I% basename % | xargs -I% echo "awk '\$1 != 2 {print \$0;}'" ${errordir}/% "| awk '{print \$2;}'>" %.nonuniq >cmds.sh +sh cmds.sh & + +#TBD: Convert all bams to sams +bamdir='/proj/b2012046/rani/analysis/gsnap/bamindex' +sam=${bamdir}/2_LPS.part_2ab.sam + +#Rm multimapped reads +ls -1 *d.nonuniq | awk -F'.' -v OFS='.' '{print $1, $2;}' >errorfiles.prefix.list +cat errorfiles.prefix.list | xargs -I% echo "perl rm.multimappedreads.pl " %.sam %.fastq.bam.reads2nmapped.nonuniq ">"%.uniqmapped.sam >cmds.sh diff --git a/bam/bamproc.sh b/GSNAP/bam/bamproc.sh similarity index 100% rename from bam/bamproc.sh rename to GSNAP/bam/bamproc.sh diff --git a/bam/bamstats.sh b/GSNAP/bam/bamstats.sh similarity index 100% rename from bam/bamstats.sh rename to GSNAP/bam/bamstats.sh diff --git a/bam/bedtoolshist2pdf.R b/GSNAP/bam/bedtoolshist2pdf.R similarity index 100% rename from bam/bedtoolshist2pdf.R rename to GSNAP/bam/bedtoolshist2pdf.R diff --git a/bam/coverage.pl b/GSNAP/bam/coverage.pl similarity index 100% rename from bam/coverage.pl rename to GSNAP/bam/coverage.pl diff --git a/bam/mergebams.pl b/GSNAP/bam/mergebams.pl similarity index 100% rename from bam/mergebams.pl rename to GSNAP/bam/mergebams.pl diff --git a/bam/rmdups.pl b/GSNAP/bam/rmdups.pl similarity index 100% rename from bam/rmdups.pl rename to GSNAP/bam/rmdups.pl diff --git a/GSNAP/basecount/basecount.sh b/GSNAP/basecount/basecount.sh new file mode 100644 index 0000000..93b655d --- /dev/null +++ b/GSNAP/basecount/basecount.sh @@ -0,0 +1,147 @@ +#Basecount (allelic count of each allele) +#DEP: dphetfilter.R +#DEP: mpileup.allelecount.pl +#DEP: vcfI162basecount.sh + + +######## +#DP and HET filter +######## + +vcfdir='/proj/b2012046/rani/data/varcalls' + +## +#VCF to tab format +## +PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH +PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB +vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome +vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome + +## +#Filter +## +#OUT: *.het.pos +mindp=10 +execdir='/bubo/home/h26/edsgard/glob/code/ase/sim' +Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp & +ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh +rename genome.DP.FORMAT.het.pos. . *.hetvars + +#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.) +#find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh +find $vcdir -maxdepth 1 -name 'allbams.vcf' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh +sh cmds.sh +ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh +sh cmds.sh +ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list +cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted allbams.vcf.pos '>' %.hetvars.alleles >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & +ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh +sh cmds.sh +rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp + + +################################################# +#BASECOUNT by MPILEUP (no varcall). +#First four of I16 == DP4 +################################################# +bamdir='/proj/b2012046/rani/analysis/gsnap/mergebam' +vcfdir='/proj/b2012046/rani/data/varcalls' +bcdir='/proj/b2012046/rani/data/gsnapbasecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +scriptdir='/proj/b2012046/rani/scripts/basecount' +projid='b2012046' +email='alva.rani@scilifelab.se' +time='4:00:00' +ref='/bubo/home/h24/alvaj/glob/annotation/gsnap/reference/reference.genome' + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list +cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh +sh cmds.sh +find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch % + +bcdir='/proj/b2012046/rani/data/basecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +find $bcdir -name '*nocall.vcf' >nocall.vcf.list +cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & + + +[alvaj@kalkyl4 basecount]$ nano basecount.sh +[alvaj@kalkyl4 basecount]$ cat basecount.sh +#Basecount (allelic count of each allele) +#DEP: dphetfilter.R +#DEP: mpileup.allelecount.pl +#DEP: vcfI162basecount.sh + + +######## +#DP and HET filter +######## + +vcfdir='/proj/b2012046/rani/data/varcalls' + +## +#VCF to tab format +## +PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH +PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB +vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome +vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome + +## +#Filter +## +#OUT: *.het.pos +mindp=10 +execdir='/bubo/home/h26/edsgard/glob/code/ase/sim' +Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp & +ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh +rename genome.DP.FORMAT.het.pos. . *.hetvars + +#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.) +find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh +sh cmds.sh +ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh +sh cmds.sh +ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list +cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.genome.DP.FORMAT.het.pos '>' %.hetvars.alleles >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & +ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh +rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp + + +################################################# +#BASECOUNT by MPILEUP (no varcall). +#First four of I16 == DP4 +################################################# +bamdir='/proj/b2012046/rani/analysis/gsnap/mergebam' +vcfdir='/proj/b2012046/rani/data/varcalls' +bcdir='/proj/b2012046/rani/data/gsnapbasecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +scriptdir='/proj/b2012046/rani/scripts/basecount' +projid='b2012046' +email='alva.rani@scilifelab.se' +time='10:00:00' +ref='/bubo/home/h24/alvaj/glob/annotation/gsnap/reference/reference.genome' + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list + +cd $scriptdir +find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list +cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh +sh cmds.sh +find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch % + +bcdir='/proj/b2012046/rani/data/basecount' +execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' +find $bcdir -name '*nocall.vcf' >nocall.vcf.list +cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh +srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh & + diff --git a/basecount/dphetfilter.R b/GSNAP/basecount/dphetfilter.R similarity index 100% rename from basecount/dphetfilter.R rename to GSNAP/basecount/dphetfilter.R diff --git a/basecount/mpileup.allelecount.pl b/GSNAP/basecount/mpileup.allelecount.pl similarity index 100% rename from basecount/mpileup.allelecount.pl rename to GSNAP/basecount/mpileup.allelecount.pl diff --git a/basecount/vcfI162basecount.sh b/GSNAP/basecount/vcfI162basecount.sh similarity index 100% rename from basecount/vcfI162basecount.sh rename to GSNAP/basecount/vcfI162basecount.sh diff --git a/run.gsnap.sh b/GSNAP/map/run.gsnap.sh similarity index 100% rename from run.gsnap.sh rename to GSNAP/map/run.gsnap.sh diff --git a/varcall/mpileup_multisample.pl b/GSNAP/varcall/varcall/mpileup_multisample.pl similarity index 100% rename from varcall/mpileup_multisample.pl rename to GSNAP/varcall/varcall/mpileup_multisample.pl diff --git a/varcall/varcall.sh b/GSNAP/varcall/varcall/varcall.sh similarity index 96% rename from varcall/varcall.sh rename to GSNAP/varcall/varcall/varcall.sh index 750a34b..3ea6f2d 100644 --- a/varcall/varcall.sh +++ b/GSNAP/varcall/varcall/varcall.sh @@ -40,7 +40,7 @@ squeue -u alvaj | grep -v JOBID | awk '{print $1;}' | xargs -I% grep % ${vcfdir} #Catenate all region-specific vcfs find $vcfdir -name 'allbams.*.vcf' | xargs cat | grep -v '^#' >${vcfdir}/allbams.vcf.tmp -cat allbams.list.MT:1-16569.vcf | grep '^#' >header.tmp +cat allbams.list.chr15:1-6600000.vcf | grep '^#' >header.tmp cat header.tmp allbams.vcf.tmp >allbams.vcf rm header.tmp allbams.vcf.tmp wc -l allbams.vcf diff --git a/map/run.tophat.sh b/Monte/run.Monte.sh similarity index 100% rename from map/run.tophat.sh rename to Monte/run.Monte.sh diff --git a/degner/run.degner.sh b/Monte/run.monteWorkflow.sh similarity index 100% rename from degner/run.degner.sh rename to Monte/run.monteWorkflow.sh diff --git a/basecount/basecount.sh b/basecount/basecount.sh deleted file mode 100644 index e9ed246..0000000 --- a/basecount/basecount.sh +++ /dev/null @@ -1,70 +0,0 @@ -#Basecount (allelic count of each allele) -#DEP: dphetfilter.R -#DEP: mpileup.allelecount.pl -#DEP: vcfI162basecount.sh - - -######## -#DP and HET filter -######## - -vcfdir='/proj/b2012046/edsgard/ase/sim/data/varcalls' - -## -#VCF to tab format -## -PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH -PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB -vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome -vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome - -## -#Filter -## -#OUT: *.het.pos -mindp=10 -execdir='/bubo/home/h26/edsgard/glob/code/ase/sim' -Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp & -ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh -rename genome.DP.FORMAT.het.pos. . *.hetvars - -#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.) -find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh -sh cmds.sh -ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh -sh cmds.sh -ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list -cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.vcf.pos '>' %.hetvars.alleles >cmds.sh -srun -p devel -t 1:00:00 -A b2010035 sh cmds.sh & -ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh -rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp - - -################################################# -#BASECOUNT by MPILEUP (no varcall). -#First four of I16 == DP4 -################################################# -bamdir='/proj/b2012046/rani/analysis/gsnap' -vcfdir='/proj/b2012046/edsgard/ase/sim/data/varcalls' -bcdir='/proj/b2012046/rani/data/basecount' -execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' -scriptdir='/proj/b2012046/rani/scripts/basecount' -projid='b2012046' -email='alva.rani@scilifelab.se' -time='5:00:00' -ref='/bubo/home/h26/edsgard/glob/annotation/human/Homo_sapiens.GRCh37.57.dna.concat.fa' - -cd $scriptdir -find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list -cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam12 ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh -sh cmds.sh -find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch % - -#Extract basecounts from vcf files (I16 field) -bcdir='/proj/b2012046/rani/data/basecount' -execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount' -find $bcdir -name '*nocall.vcf' >nocall.vcf.list -cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh -srun -p devel -t 1:00:00 -A b2010035 sh cmds.sh & - - diff --git a/degner/get.overlaps.v3.perl b/degner/get.overlaps.v3.perl deleted file mode 100644 index adead9b..0000000 --- a/degner/get.overlaps.v3.perl +++ /dev/null @@ -1,92 +0,0 @@ -#!/bin/perl - -use strict; - -my $snpfile = shift @ARGV; -my $genomepath = shift @ARGV; -my $readlength = shift @ARGV; -my $readquality = shift @ARGV; -my $seqfile; -my @chroms = (1..22, 'X', 'Y'); -foreach my $chr (@chroms){ - read_files(($chr, $snpfile, $seqfile)); - print_overlaps($chr); -} - -sub read_files { - my $chr = shift @_; - my $snpfile = shift @_; - my $seqfile = shift @_; - open(SNP, '<', $snpfile) or die "cannot open snpfile"; - open(SEQUENCE, "cat $genomepath/chr.$chr.fa |") or die "cannot open seqfile"; -#DE: chr-formatted ref (UCSC): open(SEQUENCE, "zcat $genomepath/chr$chr.fa.gz |") or die "cannot open seqfile"; -} - -sub print_overlaps { - - my $chr = shift @_; - my @sequence_arr = ; - chomp(@sequence_arr); - shift(@sequence_arr); #remove fasta header - my $sequence = join("", @sequence_arr); -#DE: UCSC formatted: $sequence =~ s/>chr$chr//g; - close(SEQUENCE); - - #DE: removes header: ; - while (defined(my $line = )) { - chomp($line); - my ($snpid, $thischr, $pos, $strand, $ref, $alt) = split(/ /, $line); - - my $plusbase; -#DE: chr-formatted ref: if ($thischr eq "chr$chr") { - if ($thischr eq $chr) { - - ### - #Print short read identical to reference seq - ### - if ($strand eq "+") { - $plusbase = $ref - } elsif ($strand eq "-") { - $plusbase = $ref; - $plusbase =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; - } - - for (my $jbase=0; $jbase<$readlength; $jbase++) { - my $short = substr($sequence, ($pos-($readlength-1-$jbase)-1), $readlength-1-$jbase) . $plusbase . substr($sequence, ($pos), $jbase); - my $RC = $short; - $RC =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; - $RC = reverse($RC); - my $readname = join('_', ($snpid, $thischr, $pos, 'REF_+', $plusbase, $jbase)); - print('@', $readname, "\n", $short, "\n", '+', $readname, "\n", $readquality, "\n"); - $readname = join('_', ($snpid, $thischr, $pos, 'REF_-', $plusbase, $jbase)); - print('@', $readname, "\n", $RC, "\n", '+', $readname, "\n", $readquality, "\n"); -# print('@', "$snpid_$thischr_$pos_REF_+_$plusbase", "_$jbase", "\n$short\n", '+', "$snpid_$thischr_$pos_REF_+_$plusbase", "_$jbase", "\n$readquality\n"); -# print('@', "$snpid_$thischr_$pos_REF_-_$plusbase", "_$jbase", "\n$RC\n", '+', "$snpid_$thischr_$pos_REF_-_$plusbase", "_$jbase", "\n$readquality\n"); - } - - ### - #Print short read with alternate allele - ### - if ($strand eq "+") { - $plusbase = $alt - } elsif ($strand eq "-") { - $plusbase = $alt; - $plusbase =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; - } - - for (my $jbase=0; $jbase<$readlength; $jbase++) { - my $short = substr($sequence, ($pos-($readlength-1-$jbase)-1), $readlength-1-$jbase) . $plusbase . substr($sequence, ($pos), $jbase); - my $RC = $short; - $RC =~ tr/ATGCYRKMBDHVatgcyrkmbdhv/TACGRYMKVHDBtacgrymkvhdb/; - $RC = reverse($RC); - my $readname = join('_', ($snpid, $thischr, $pos, 'NONREF_+', $plusbase, $jbase)); - print('@', $readname, "\n", $short, "\n", '+', $readname, "\n", $readquality, "\n"); - $readname = join('_', ($snpid, $thischr, $pos, 'NONREF_-', $plusbase, $jbase)); - print('@', $readname, "\n", $RC, "\n", '+', $readname, "\n", $readquality, "\n"); -# print('@', "$snpid_$thischr_$pos_NONREF_+_$plusbase", "_$jbase", "\n$short\n", '+', "$snpid_$thischr_$pos_NONREF_+_$plusbase", "_$jbase", "\n$readquality\n"); -# print('@', "$snpid_$thischr_$pos_NONREF_-_$plusbase", "_$jbase", "\n$RC\n", '+', "$snpid_$thischr_$pos_NONREF_-_$plusbase", "_$jbase", "\n$readquality\n"); - } - } - } - close(SNP); -}