Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions Degner/Bam/bamproc.degner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#Processing bam files
#DEP: rmdups.pl
#DEP: mergebams.pl


###
#rmDups
###
email='alva.rani@scilifelab.se'
projid='b2012046'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/bam'
sbatchdir='/proj/b2012046/rani/scripts/degner/bamscripts'
bamfile='/proj/b2012046/rani/analysis/degner'
cd $sbatchdir
find $bamfile -name 'accepted_hits.bam' | awk -F'/' -v execdir=$execdir -v sbatchdir=$sbatchdir -v projid=$projid -v email=$email '{print "perl", execdir"/rmdups.pl", $0, $7, sbatchdir, projid, email;}' >rmdups.sh
sh rmdups.sh
find . -name '*.sbatch' | xargs -I% sbatch %
####
find . -name '*.sorted' | wc -l

#Check:
find $bamfile -name '*.bam.sorted.nodup' | xargs -I% ls -lrth %
find $bamfile -name '*.rmdups.stderr' | xargs -I% grep -i 'err'
find $bamfile -name '*.rmdups.stderr' | xargs -I% grep -i 'warn'

#rm unsorted bams
find $bamfile -name '*.bam' | xargs -I% rm %

#rename bamfiles
find -name '*.bam.sorted' | xargs -I% echo rename '.bam.sorted' '.sorted.bam' % >cmds.sh
sh cmds.sh

#rm unsorted bams
find $bamdir -name 'accepted_hits.bam' | grep -v 'mmseq' | xargs -I% rm %

#rename bamfiles
find $bamdir -name '*.bam.sorted.nodup' | xargs -I% echo rename '.bam.sorted.nodup' '.sorted.nodup.bam' % >cmds.sh
sh cmds.sh
####

###
#Merge and sort bam files
###
projid='b2012046'
email='alva.rani@scilifelab.se'
sbatchdir='/proj/b2012046/rani/scripts/degner/mergebamScripts'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/bam'
bamsort='/proj/b2012046/rani/analysis/degner/'
outdir='/proj/b2012046/rani/analysis/degner/mergebam'
time='11:00:00'
cd $sbatchdir
find $bamsort -name '*.sorted.nodup.bam' | sed 's/\..*//' | sort -u >samples.list
cat samples.list | xargs -I% basename % | xargs -I% echo perl ${execdir}'/mergebams.pl' % $bamsort $outdir $sbatchdir $projid $email $time >cmds.sh
sh cmds.sh
find $sbatchdir -name '*.mergesams.sbatch' | xargs -I% sbatch %



## Submitted
#Check:
find ${bamfile}/info/*.stderr | xargs -I% grep -i 'err' %
find ${bamfile}/info/*.mergesams.*.stderr | xargs -I% grep -i 'warn' %
####

###
#Create indexes
###
bamsort='/proj/b2012046/rani/analysis/degner/mergebam'
find $bamsort -maxdepth 1 -name '*.bam' | xargs -I% echo samtools index % > cmd.sh
#Manually add sbatch header to the cmds.sh script
(cat <<EOF
#!/bin/bash -l
#SBATCH -A $projid
#SBATCH -t 3:00:00
#SBATCH -J indexbam
#SBATCH -p core -n 1
#SBATCH -e ${bamsort}/info/indexbam.jid_%j.stderr
#SBATCH -o ${bamsort}/info/indexbam.jid_%j.stdout
#SBATCH --mail-type=All
#SBATCH --mail-user=$email
module load bioinfo-tools
module load samtools/0.1.18
EOF
) >bamindex.sbatchheader
cat bamindex.sbatchheader cmd.sh >bamindex.sbatch
sbatch bamindex.sbatch

############################
## Done all steps here without errors

64 changes: 64 additions & 0 deletions Degner/Bam/mergebams.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/perl

#find '/proj/b2011075/analysis/hs_pipe/outdata/run1' -name 'accepted_hits.bam' | awk -F'/' '{print "perl /bubo/home/h26/edsgard/glob/code/ngs_pipes/hs_pipe/rmdups.pl", $0, $10, "/proj/b2011075/analysis/hs_pipe/outscripts/run1 b2010035 daniel.edsgard@scilifelab.se";}' >rmdups.sh

use strict;

#my ($sampleid, $bamdir, $outdir, $sbatch_dir, $aid, $em) = @ARGV;

mergesams(@ARGV);

sub mergesams {

use File::Spec::Functions;
use File::Basename;

my $sampleid = shift @_;
my $bamdir = shift @_;
my $outdata_dir = shift @_;
my $sbatch_dir = shift @_;
my $aid = shift @_;
my $em = shift @_;
my $time = shift @_;

if(!defined($time)){
$time = '23:59:00';
}

my $sbatch_file = catfile($sbatch_dir, "$sampleid.mergesams.sbatch");
my $outdata_infodir = catfile($outdata_dir, 'info');

#get bamfiles to be merged
my @bamfiles = `find $bamdir -name '*.bam' | grep $sampleid`;
chomp(@bamfiles);

#set outfile
my $bamfile_merged = catfile($outdata_dir, $sampleid . '.merged.bam');

#make dirs
`mkdir -p $outdata_infodir;`; #Creates the data and info dir
`mkdir -p $sbatch_dir;`; #Creates the sbatch-script dir

open (SBATCH, '>', $sbatch_file) or die("Can't write to $sbatch_file: $!\n");

#print sbatch vars
print SBATCH "#!/bin/bash -l", "\n";
print SBATCH "#SBATCH -A ", $aid, "\n";
print SBATCH "#SBATCH -t ", $time, "\n";
print SBATCH "#SBATCH -J mergesams_", $sampleid, "\n";
print SBATCH "#SBATCH -p core -n 1", "\n";
print SBATCH "#SBATCH -e $outdata_infodir/$sampleid" . '.mergesams.jid_%.stderr' . "\n";
print SBATCH "#SBATCH -o $outdata_infodir/$sampleid" . '.mergesams.jid_%.stdout' . "\n";
unless ($em eq 0) {
print SBATCH "#SBATCH --mail-type=All", "\n";
print SBATCH "#SBATCH --mail-user=$em", "\n\n";
}

#print vars
print SBATCH 'module load bioinfo-tools' . "\n";
print SBATCH 'module load samtools/0.1.18' . "\n";

#print cmd
print SBATCH 'samtools merge'. ' ' OUTPUT=' . $bamfile_merged . ' INPUT=' . join(' INPUT=', @bamfiles) ' . "\n";
close(SBATCH);
}
145 changes: 145 additions & 0 deletions Degner/Basecount/basecount.degner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#Basecount (allelic count of each allele)
#DEP: dphetfilter.R
#DEP: mpileup.allelecount.pl
#DEP: vcfI162basecount.sh


########
#DP and HET filter
########

vcfdir='/proj/b2012046/rani/data/degvarcall'

##
#VCF to tab format
##
PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH
PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB
vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome
vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome

##
#Filter
##
#OUT: *.het.pos
mindp=10
execdir='/bubo/home/h26/edsgard/glob/code/ase/sim'
Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp &
ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh
rename genome.DP.FORMAT.het.pos. . *.hetvars

#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.)
find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh
sh cmds.sh
ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh
sh cmds.sh
ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list
cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.genome.DP.FORMAT.het.pos '>' %.hetvars.alleles >cmds.sh
srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh &
ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh
rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp


#################################################
#BASECOUNT by MPILEUP (no varcall).
#First four of I16 == DP4
#################################################
bamdir='/proj/b2012046/rani/analysis/degner/mergebam'
vcfdir='/proj/b2012046/rani/data/degvarcall'
bcdir='/proj/b2012046/rani/data/degnerbasecount'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount'
scriptdir='/proj/b2012046/rani/scripts/degner/basecount'
projid='b2012046'
email='alva.rani@scilifelab.se'
time='10:00:00'
ref='/bubo/home/h24/alvaj/glob/annotation/degner/reference/Homo_sapiens.maskedRefe.GRCh37.57.fa'

cd $scriptdir
find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list

cd $scriptdir
find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list
cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh
sh cmds.sh
find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch %

bcdir='/proj/b2012046/rani/data/basecount'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount'
find $bcdir -name '*nocall.vcf' >nocall.vcf.list
cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh
srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh &


[alvaj@kalkyl4 basecount]$ nano basecount.sh
[alvaj@kalkyl4 basecount]$ cat basecount.sh
#Basecount (allelic count of each allele)
#DEP: dphetfilter.R
#DEP: mpileup.allelecount.pl
#DEP: vcfI162basecount.sh


########
#DP and HET filter
########

vcfdir='/proj/b2012046/rani/data/varcalls'

##
#VCF to tab format
##
PATH=${PATH}:/bubo/home/h26/edsgard/opt/vcftools_0.1.7/bin; export PATH
PERL5LIB=/bubo/home/h26/edsgard/opt/vcftools_0.1.7/lib; export PERL5LIB
vcftools --vcf allbams.vcf --extract-FORMAT-info DP --out genome
vcftools --vcf allbams.vcf --extract-FORMAT-info GT --out genome

##
#Filter
##
#OUT: *.het.pos
mindp=10
execdir='/bubo/home/h26/edsgard/glob/code/ase/sim'
Rscript ${execdir}/dphetfilter.R genome.DP.FORMAT genome.GT.FORMAT $mindp &
ls -1 *.het.pos | xargs -I% echo cat % '|' sed "'s/\./ /' >"%.hetvars >cmds.sh
rename genome.DP.FORMAT.het.pos. . *.hetvars

#Dump "chr,pos,ref,alt" for these vars. (Used as input in snv.ase.R since the alt allele is seldom biallelic when running mpileup.allelecount.pl below.)
find $vcdir -maxdepth 1 -name '*.vcf' | grep -v 'allbams' | xargs -I% echo "awk -F'\t' '{print \$1\".\"\$2, \$1, \$2, \$4, \$5;}' % | grep -v '^#' | sort -k 1,1 >"%.pos >cmds.sh
sh cmds.sh
ls -1 *.het.pos | xargs -I% echo sort % '>'%.sorted >cmds.sh
sh cmds.sh
ls -1 *.het.pos.sorted | sed 's/.genome.DP.FORMAT.het.pos.sorted//' >samples.list
cat samples.list | xargs -I% echo join -j1 %.genome.DP.FORMAT.het.pos.sorted %.genome.DP.FORMAT.het.pos '>' %.hetvars.alleles >cmds.sh
srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh &
ls -1 *.hetvars.alleles | xargs -I% echo cut -d"' '" -f2-5 % "| tr ' ' '\t' >"%.tmp >cmds.sh
rename .hetvars.alleles.tmp .hetvars.alleles *.hetvars.alleles.tmp


#################################################
#BASECOUNT by MPILEUP (no varcall).
#First four of I16 == DP4
#################################################
bamdir='/proj/b2012046/rani/analysis/gsnap/mergebam'
vcfdir='/proj/b2012046/rani/data/varcalls'
bcdir='/proj/b2012046/rani/data/gsnapbasecount'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount'
scriptdir='/proj/b2012046/rani/scripts/basecount'
projid='b2012046'
email='alva.rani@scilifelab.se'
time='10:00:00'
ref='/bubo/home/h24/alvaj/glob/annotation/gsnap/reference/reference.genome'

cd $scriptdir
find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list

cd $scriptdir
find $bamdir -maxdepth 1 -name '*.bam' | xargs -I% basename % | sed 's/\.bam//' >samples.list
cat samples.list | xargs -I% echo 'perl' ${execdir}'/mpileup.allelecount.pl' ${bamdir}/%.bam ${vcfdir}/%.hetvars $ref $scriptdir $projid $email $time $bcdir >cmds.sh
sh cmds.sh
find $scriptdir -name '*mpileup.basecount*' | xargs -I% sbatch %

bcdir='/proj/b2012046/rani/data/basecount'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/basecount'
find $bcdir -name '*nocall.vcf' >nocall.vcf.list
cat nocall.vcf.list | xargs -I% echo 'sh' ${execdir}/vcfI162basecount.sh % '>' %.basecount >cmds.sh
srun -p devel -t 1:00:00 -A b2012046 sh cmds.sh &

57 changes: 57 additions & 0 deletions Degner/VArcall/degner.varcall.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#Varcalling with SAMtools
#DEP: mpileup_multisample.pl


###
#VarCall (SAMtools)
###

projid='b2012046'
email='alva.rani@scilifelab.se'
sbatchdir='/proj/b2012046/rani/scripts/degner/varscripts'
bamfile='/proj/b2012046/rani/analysis/degner/mergebam'
vcfdir='/proj/b2012046/rani/data/degvarcall'
ref='/bubo/home/h24/alvaj/glob/annotation/degner/reference/Homo_sapiens.maskedRefe.GRCh37.57.fa'
execdir='/bubo/home/h24/alvaj/glob/code/ASE/varcall'

cd $sbatchdir
# need to redo it again with merged bam files
#create file listing the bam files
allbamsfile=${vcfdir}/allbams.list
find $bamfile -maxdepth 1 -name '*.bam' >$allbamsfile

#create region files
module load bioinfo-tools
module load samtools/0.1.18
srun -p devel -A b2012046 -t 1:00:00 samtools faidx $ref &
vcfutils.pl splitchr -l 6600000 ${ref}.fai >${vcfdir}/genome.480.regs


#Run mpileup
cat ${vcfdir}/genome.480.regs | xargs -I% echo 'perl' ${execdir}/mpileup_multisample.pl $allbamsfile % $sbatchdir $projid $email $ref >cmds.sh
sh cmds.sh
find $sbatchdir -name '*.mpileup.sbatch' | xargs -I% sbatch %


#check status:
cd $vcfdir
lt *.stderr >allerrfiles.tmp
squeue -u alvaj | grep -v JOBID | awk '{print $1;}' | xargs -I% grep % ${vcfdir}/info/allerrfiles.tmp

#Catenate all region-specific vcfs
find $vcfdir -name 'allbams.*.vcf' | xargs cat | grep -v '^#' >${vcfdir}/allbams.vcf.tmp
cat allbams.list.MT:1-16569.vcf | grep '^#' >header.tmp
cat header.tmp allbams.vcf.tmp >allbams.vcf
rm header.tmp allbams.vcf.tmp
wc -l allbams.vcf
#26

#Rm slashes ilon vcf sample header
vcfdir='/proj/b2012046/rani/data/varcalls'
cd ${vcfdir}
cat allbams.vcf | grep '^#' >header.tmp
cat header.tmp | sed 's/\/proj\/b2012046\/rani\/analysis\/degner\/mergebam\///g' | sed 's/\.bam//g' >file.tmp
mv file.tmp header.tmp
grep -v '^#' allbams.*.vcf >allbams.vcf.noheader
cat header.tmp allbams.vcf.noheader >allbams.vcf
rm allbams.vcf.noheader header.tmp
Loading