From 5a8b658874dbf1bbba6ba09f40d9c8b4dba2fbff Mon Sep 17 00:00:00 2001 From: elcortegano <57492899+elcortegano@users.noreply.github.com> Date: Thu, 25 Mar 2021 22:58:34 +0000 Subject: [PATCH 1/2] Check insertion and deletion blast files In many instances, no deletions or insertions are present in blast files, resulting in errors reported by `cat` and `rm`. Not insertion and deletion blast files are only sorted (and later removed) when they exist. --- mumandco_v2.4.2.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mumandco_v2.4.2.sh b/mumandco_v2.4.2.sh index 6a7358d..402102b 100644 --- a/mumandco_v2.4.2.sh +++ b/mumandco_v2.4.2.sh @@ -1082,7 +1082,13 @@ then cat $prefix.filteredcalls | awk '{if($6!="insertion" && $6!="deletion") print $0}' > $prefix.noINDEL echo "ref_chr query_chr ref_start ref_stop size SV_type query_start query_stop" > $prefix.SVs_all.tsv - cat $prefix.insertion_blast $prefix.deletion_blast $prefix.noINDEL | sort -k1,1 -k3V >> $prefix.SVs_all.tsv + if [ -f $prefix.insertion_blast ]; then + sort -k1,1 -k3V $prefix.insertion_blast >> $prefix.SVs_all.tsv + fi + if [ -f $prefix.deletion_blast ]; then + sort -k1,1 -k3V $prefix.deletion_blast >> $prefix.SVs_all.tsv + fi + sort -k1,1 -k3V $prefix.noINDEL >> $prefix.SVs_all.tsv rm query_segment.fa rm ref_segment.fa @@ -1172,8 +1178,8 @@ then rm $prefix.deletions rm $prefix.blast_in rm $prefix.blast_del - rm $prefix.deletion_blast - rm $prefix.insertion_blast + rm -f $prefix.deletion_blast + rm -f $prefix.insertion_blast fi fi From a5b93827651fc67ee7d98f77f57846261437045d Mon Sep 17 00:00:00 2001 From: elcortegano <57492899+elcortegano@users.noreply.github.com> Date: Thu, 25 Mar 2021 23:46:20 +0000 Subject: [PATCH 2/2] awk only divides when NR is not NULL The END block of `awk` does not check whether the NR variable is NULL or not, which sometimes results in "fatal: division by zero attempted" errors. The code changes prevent this error. --- mumandco_v2.4.2.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mumandco_v2.4.2.sh b/mumandco_v2.4.2.sh index 402102b..7ebc07b 100644 --- a/mumandco_v2.4.2.sh +++ b/mumandco_v2.4.2.sh @@ -162,7 +162,7 @@ echo "" # > ""$prefix"_query".coordsg_matched ##get average quality -average_quality=$(tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" | awk '{sum+=$7} END{print sum/NR}') +average_quality=$(tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" | awk '{sum+=$7} END{if(NR){print sum/NR}}') ### filter for alignment less than 10kb (limit on translocation size) ### filter for any alignment overlapping another with more than 50bp and with lower quality @@ -244,7 +244,7 @@ tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" |\ # ##filter any remaining alignments with less than (1.25 x standard deviation) less than the average of the remaining alignments -average_quality=$(cat ""$prefix"_pre_list.txt" | awk '{sum+=$7} END{print sum/NR}') +average_quality=$(cat ""$prefix"_pre_list.txt" | awk '{sum+=$7} END{if(NR){print sum/NR}}') if [[ "$average_quality" == "100" ]] then @@ -256,7 +256,7 @@ if [[ "$average_quality" == "100" ]] else - stdev125=$(cat ""$prefix"_pre_list.txt" | awk '{sum=sum+$7 ; sumX2+=(($7)^2)} END{print 1.25*(sqrt(sumX2/(NR) - ((sum/NR)^2)))}') + stdev125=$(cat ""$prefix"_pre_list.txt" | awk '{sum=sum+$7 ; sumX2+=(($7)^2)} END{if(NR){print 1.25*(sqrt(sumX2/(NR) - ((sum/NR)^2)))}}') quality_filt=$(echo $average_quality - $stdev125 | bc) cat ""$prefix"_pre_list.txt" | awk -v quality_filt="$quality_filt" '{if($7 > quality_filt) print $14"\t"$15}' | sort | uniq > ""$prefix"_chromosome_pairs.txt" @@ -1116,7 +1116,7 @@ echo "" total=$(cat $prefix.filteredcalls | awk '{print $0}' | wc -l) total_del=$(cat $prefix.filteredcalls | awk '/deletion/{print $0}' | wc -l) total_ins=$(cat $prefix.filteredcalls | awk '/insertion/{print $0}' | wc -l) -#total_INDELmean=$(cat $prefix.filteredcalls | awk 'BEGIN{sum=0} !/double/&&/deletion/||/insertion/{sum+=$5} END{print sum/NR}') +#total_INDELmean=$(cat $prefix.filteredcalls | awk 'BEGIN{sum=0} !/double/&&/deletion/||/insertion/{sum+=$5} END{if(NR){print sum/NR}}') total_dup=$(cat $prefix.filteredcalls | awk '/duplication/{print $0}' | wc -l) total_inv=$(cat $prefix.filteredcalls | awk '/inversion/{print $0}' | wc -l) total_trans=$(cat $prefix.filteredcalls | awk '!/complicated/&&/transloc/{print $0}' | wc -l)