diff --git a/dDocent b/dDocent index f76051f..ae02de7 100755 --- a/dDocent +++ b/dDocent @@ -3,7 +3,7 @@ export LC_ALL=en_US.UTF-8 export SHELL=bash ##########dDocent########## -VERSION='2.9.5'; export VERSION +VERSION='2.9.6'; export VERSION #This script serves as an interactive bash wrapper to QC, assemble, map, and call SNPs from double digest RAD (SE or PE), ezRAD (SE or PE) data, or SE RAD data. #It requires that your raw data are split up by tagged individual and follow the naming convention of: @@ -152,7 +152,7 @@ if [ "$TEST" -gt 0 ]; then fi #Count number of individuals in current directory -NumInd=$(ls *.F.fq.gz 2> /dev/null | wc -l) +NumInd=$(find . -maxdepth 1 -type f -name "*.F.fq.gz" | wc -l) NumInd=$(($NumInd - 0)) #Test for file limits for current user and reset if necessary @@ -168,7 +168,7 @@ if [ "$Flimit" != "unlimited" ]; then fi #Create list of sample names -ls *.F.fq.gz > namelist 2> /dev/null +find . -maxdepth 1 -type f -name "*.F.fq.gz" -printf "%f\n" > namelist sed -i'' -e 's/.F.fq.gz//g' namelist #Create an array of sample names NUMNAMES=$(mawk '/_/' namelist | wc -l) @@ -416,7 +416,7 @@ fi if [ "$MAP" != "no" ]; then echo -e "\nCreating alignment intervals" - ls *-RG.bam >bamlist.list + find . -maxdepth 1 -type f -name "*-RG.bam" -printf "%f\n" > bamlist.list CreateIntervals fi @@ -424,7 +424,7 @@ fi if [ "$SNP" != "no" ]; then #Create list of BAM files - ls *-RG.bam >bamlist.list + find . -maxdepth 1 -type f -name "*-RG.bam" -printf "%f\n" > bamlist.list #If mapping is not being performed, but intervals do not exist they are created if [[ "$MAP" == "no" && ! -f "cat-RRG.bam" ]]; then CreateIntervals @@ -435,12 +435,11 @@ if [ "$SNP" != "no" ]; then fi #Check to make sure interval files have been created if [[ "$MAP" == "no" && ! -f "mapped.bed" ]]; then - #bedtools bamtobed -i cat-RRG.bam > cat-RRG.bed + #bedtools bamtobed -i cat-RRG.bam > cat-RRG.bed #bedops --merge cat-RRG.bed > mapped.bed cat namelist | parallel -j $FB2 "bedtools bamtobed -i {}-RG.bam | sort-bed - | bedops --merge - > {}-RG.bed" bedops --merge *-RG.bed | bedtools sort -i - -faidx reference.fasta.fai > mapped.bed - rm *-RG.bed - + find . -maxdepth 1 -type f -name "*-RG.bed" -delete fi #This code estimates the coverage of reference intervals and removes intervals in 0.01% of depth #This allows genotyping to be more effecient and eliminates extreme copy number loci from the data @@ -563,8 +562,9 @@ if [ "$SNP" != "no" ]; then len=$3-$2;lc=len*$4;cov = cov + lc if ( cov < cutoff) {x="mapped."i".bed";print $1"\t"$2"\t"$3 > x} else {i=i+1; x="mapped."i".bed"; print $1"\t"$2"\t"$3 > x; cov=0} - }' - ls mapped.*.bed | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos2 -j 4 --no-notice "call_genos2 {} " + }' + + find . -maxdepth 1 -type f -name "mapped.*.bed" -printf "%f\n" | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos2 -j 4 --no-notice "call_genos2 {} " if [ -f "freebayes.error" ]; then echo -e "\n\n\nFreeBayes has failed when trying to finish a previously failed instance. Memory and processor settings need to be drastically reconfigured" @@ -588,8 +588,7 @@ if [ "$SNP" != "no" ]; then export -f finish_genos rm freebayes.error freebayes.log &> /dev/null - - ls mapped.*.bed | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NUMProc --no-notice --joblog ./par.log "call_genos {} " + find . -maxdepth 1 -type f -name "mapped.*.bed" -printf "%f\n" | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NUMProc --no-notice --joblog ./par.log "call_genos {} " if [ -f "freebayes.error" ]; then FAILS=$( cat freebayes.error | wc -l ) @@ -601,7 +600,7 @@ if [ "$SNP" != "no" ]; then rm freebayes.redos else echo -e "\nmultiple instances of freebayes failed. dDocent will now recalibrate run parameters to use less memory.\n" - rm mapped.*.bed + find . -maxdepth 1 -type f -name "mapped.*.bed" -delete rm freebayes.error LIM=$(( $SNPNUMProc * 4 )) if head -1 reference.fasta | grep -e 'dDocent' 1>/dev/null; then @@ -628,7 +627,7 @@ if [ "$SNP" != "no" ]; then echo "Using FreeBayes to call SNPs again" NumP=$(( $NUMProc / 4 )) NumP=$(( $NumP * 3 )) - ls mapped.*.bed | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NumP --no-notice --joblog ./par2.log "call_genos {} " + find . -maxdepth 1 -type f -name "mapped.*.bed" -printf "%f\n" | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NumP --no-notice --joblog ./par2.log "call_genos {} " fi fi @@ -666,7 +665,7 @@ if [ "$SNP" != "no" ]; then NumP=$(( $NumP / 4 )) NumP=$(( $NumP * 3 )) echo "Using FreeBayes to call SNPs again" - ls mapped.*.bed | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NumP --joblog ./par3.log --no-notice "call_genos {}" + find . -maxdepth 1 -type f -name "mapped.*.bed" -printf "%f\n" | sed -e 's/mapped.//g' | sed -e 's/.bed//g' | shuf | parallel --bar --halt now,fail=10 --env call_genos -j $NumP --joblog ./par3.log --no-notice "call_genos {}" fi fi @@ -677,9 +676,8 @@ if [ "$SNP" != "no" ]; then else ERROR3=0 export ERROR3 - fi - - rm mapped.*.bed + fi + find . -maxdepth 1 -type f -name "mapped.*.bed" -delete mv raw.1.vcf raw.01.vcf 2>/dev/null mv raw.2.vcf raw.02.vcf 2>/dev/null @@ -768,7 +766,7 @@ TrimReads () { if [ $STACKS -gt 0 ]; then echo "Removing the _1 character and replacing with /1 in the name of every sequence" cat namelist | parallel -j $FB1 --no-notice "gunzip -c {}.F.fq.gz | sed -e 's:_1$:/1:g' > {}.F.fq" - rm -f *.F.fq.gz + find . -maxdepth 1 -type f -name "*.F.fq.gz" -delete cat namelist | parallel -j $FB1 --no-notice "gzip {}.F.fq" fi @@ -778,7 +776,7 @@ TrimReads () { if [ $STACKS -gt 0 ]; then echo "Removing the _2 character and replacing with /2 in the name of every sequence" cat namelist | parallel -j $FB1 --no-notice "gunzip -c {}.R.fq.gz | sed -e 's:_2$:/2:g' > {}.R.fq" - rm -f *.R.fq.gz + find . -maxdepth 1 -type f -name "*.R.fq.gz" -delete cat namelist | parallel -j $FB1 --no-notice "gzip {}.R.fq" fi fi @@ -1205,7 +1203,7 @@ wait #bedops --merge cat-RRG.bed > mapped.bed cat namelist | parallel -j $FB2 "bedtools bamtobed -i {}-RG.bam | sort-bed - | bedops --merge - > {}-RG.bed" bedops --merge *-RG.bed | bedtools sort -i - -faidx reference.fasta.fai > mapped.bed - rm *-RG.bed + find . -maxdepth 1 -type f -name "*-RG.bed" -delete } #This checks that dDocent has detected the proper number of individuals and exits if incorrect