mapgd-parallel/mapgd-original.pbs at master · LynchLab/mapgd-parallel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
#PBS -N mapgd-origin-Bwa
#PBS -k o
#PBS -l nodes=1:ppn=16,walltime=200:00:00
#PBS -l vmem=100gb
#PBS -M ouqd@hotmail.com
#PBS -m abe
#PBS -j oe

set +x
echo PBS -N mapgd-PA2013-Bwa
echo PBS -k o
echo PBS -l nodes=1:ppn=16,walltime=200:00:00
echo PBS -l vmem=100gb
echo PBS -M ouqd@hotmail.com
echo PBS -m abe
echo PBS -j oe
echo Updated on 05/28/2018
echo module rm gcc
echo module load gcc
echo module load gsl
echo module load samtools
module rm gcc
module load gcc
module load gsl
module load samtools

export SampleID=PA2013
export DATA_DIR=/N/dc2/scratch/xw63/$SampleID/Bwa
export HeaderFile=$DATA_DIR/PA42.header

echo SampleID=PA2013
echo DATA_DIR=/N/dc2/scratch/xw63/$SampleID/Bwa
echo HeaderFile=$DATA_DIR/PA42.header

set -x
cd $DATA_DIR
set +x
echo ===============================================================
echo 0. Make a header file
echo ===============================================================
set -x
time samtools view -H $DATA_DIR/$SampleID-001-RG_Sorted_dedup_realigned_Clipped.bam > $HeaderFile
set +x
echo ===============================================================
echo 1. Make a pro file of nucleotide-read quartets -counts of A, C, G, and T, from the mpileup files of the clones.
echo ===============================================================
set -x
time mapgd proview -i *.mpileup -H $HeaderFile > $SampleID.pro.txt
set +x

echo ===============================================================
echo 2. Exclude mtDNA data from the pro file.
echo ===============================================================
set -x

time grep -v '^PA42_mt_genome' $SampleID.pro.txt > $SampleID-Nuc.pro.txt
set +x

echo ===============================================================
echo 3. Run the allele command to estimate allele and genotype frequencies from the pro file.
echo ===============================================================
set -x
time mapgd allele -i $SampleID-Nuc.pro.txt -o $SampleID.map -p $SampleID.clean
set +x

echo ===============================================================
echo 4. Run the filter command to filter the map file of ML estimates of the parameters.
echo ===============================================================
set -x
time mapgd filter -i $SampleID.map.map -p 20 -q 0.05 -Q 0.45 -c 800 -C 2400 -o $SampleID-filtered.map
#-p: minimum value of the likelihood-ratio test statistic for polymorphism
#-q: minimum minor-allele frequency estimate
#-Q: maximum minor-allele frequency estimate
#-c: minimum population coverage
#-C: maximum population coverage
set +x

echo ===============================================================
echo 5. Run the genotype command to generate a file of genotype likelihoods
echo ===============================================================
set -x
time mapgd genotype -p $SampleID.clean.pro -m $SampleID-filtered.map.map > $SampleID.genotype
set +x

echo ===============================================================
echo 6-1 Remove the unnecessary header and footer
echo ===============================================================
set -x
time awk '{if ($3 != "MN_FREQ" && $3 >= 0.0 && $3 <= 1.0) print}' $SampleID.genotype > $SampleID-F.genotype
set +x
echo ===============================================================
echo 6-2 Randomly pick a specified number, 100,000 of SNPs from the file of genotype likelihoods
===============================================================

set -x
time /N/dc2/projects/daphpops/Software/MAPGD-0.4.26/extras/sub_sample.py $SampleID-F.genotype -N 200000 > $SampleID-200K_F.genotype
set +x
echo ===============================================================
echo 6-3 Extract the header from the file of genotype likelihoods
echo ===============================================================

set -x

time head -n -1 $SampleID.genotype | awk '{if ($3 == NULL || $1 ~ /^@/) print}' > $SampleID-header.genotype
set +x
echo ===============================================================
echo 6-4 Extract the footer from the file of genotype likelihoods
echo ===============================================================

set -x
time tail -n 1 $SampleID.genotype > $SampleID-footer.genotype
set +x
echo ===============================================================
echo 6-5 Add the header and footer to the sub-sample of the file of genotype likelihoods
echo ===============================================================
set -x
time cat $SampleID-header.genotype $SampleID-200K_F.genotype $SampleID-footer.genotype > $SampleID-wh_wf_200K_F.genotype

set +x
echo ===============================================================
echo 7. Run the relatedness command
echo ===============================================================

set -x
time mapgd relatedness -i $SampleID-wh_wf_200K_F.genotype -o $SampleID-200K_rel.out

set +x
echo ===============================================================
echo =============Task completed.===================
echo ===============================================================