-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
Running needLR from the script directory challenges implementation in nextflow workflows. I made the following changes in order to implement the WD_PREFIX parameter (half implemented apparently), and an OUT_DIR parameter.
needLR_3.3.sh
#!/bin/bash
export PATH=/gsc/software/linux-x86_64-centos7/bcftools-1.19/bin:$PATH
export PATH=/gsc/software/linux-x86_64-centos7/bedtools-2.27.1/bin:$PATH
##Run from inside needLR_local
while getopts f:g:s:o: flag
do
case "${flag}" in
f) QUERY_VCFS="${OPTARG}" ;;
g) REF_GENOME="${OPTARG}" ;;
s) WD_PREFIX="${OPTARG}" ;;
o) OUT_DIR="${OPTARG}" ;;
esac
done
echo ${QUERY_VCFS}
echo ${REF_GENOME}
echo ${WD_PREFIX}
echo ${OUT_DIR}
##This starts the loop for all of the files in QUERY_VCFS
while IFS= read -r QUERY_FILE_PATH; do
##This will be the basename for the output files
QUERY_FILE_NAME=$(basename "$QUERY_FILE_PATH" .vcf.gz)_needLR_3.3
##This is the output directory specific to each query VCF
UNSOLVED_DIR="${OUT_DIR}/needLR_output/$QUERY_FILE_NAME"
mkdir -p $UNSOLVED_DIR
##This is the pre-bcftools merged background vcf
ONTUW150_BCFTOOLS_MERGED="${WD_PREFIX}/backend_files/UWONT150_truvari.vcf.gz"
##This is the list of the 1KGP sample names in the order they will be merged in Jasmine
KGP_SAMPLE_NAMES="${WD_PREFIX}/backend_files/ID_order.txt"
##These are the bed files used for annotating the SVs
GENES="${WD_PREFIX}/backend_files/bed_files/PROTEIN_CODING_GENE_gencode.v45.annotation_1kb_slop.bed"
CODING_REGIONS="${WD_PREFIX}/backend_files/bed_files/ENSEMBL_CANONICAL_EXON_in_PROTEIN_CODING_GENE_gencode.v45.annotation.bed"
OMIM_GENES="${WD_PREFIX}/backend_files/bed_files/OMIM_gene_phen_hg38.bed"
CENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_centromeres_endtoend.bed"
PERICENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_pericentromeres_5Mb.bed"
TELOMERES="${WD_PREFIX}/backend_files/bed_files/hg38_telomeres_5Mb.bed"
DEFRABB_HICONF="${WD_PREFIX}/backend_files/bed_files/GRCh38_HG002-T2TQ100-V1.0_stvar.benchmark.bed"
STR="${WD_PREFIX}/backend_files/bed_files/STR_original_motifs.set148.bed"
VNTR="${WD_PREFIX}/backend_files/bed_files/VNTR_original_motifs.set148.bed"
SEGDUPS="${WD_PREFIX}/backend_files/bed_files/SEGDUPS_GIAB_v3.3.bed"
REPEAT_MASKER="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Repeats_RepeatMasker.bed"
GAPS="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Mapping_and_Sequencing_Gap.bed"
I then call with
#put the vcfs into a txt file
ls ${VCF} > vcfs.txt
#run needlr
${params.conda} run -n needLR \
bash ${params.needlr_single} \
-f vcfs.txt \
-g ${reference_fasta} \
-s ${params.needlr_script_dir} \
-o ./
needLR_trio_3.3.sh
#!/bin/bash
export PATH=/gsc/software/linux-x86_64-centos7/bcftools-1.19/bin:$PATH
export PATH=/gsc/software/linux-x86_64-centos7/bedtools-2.27.1/bin:$PATH
##Run from inside needLR_local
while getopts p:m:f:g:s:o: flag
do
case "${flag}" in
p) PROBAND_VCF="${OPTARG}" ;;
m) MATERNAL_VCF="${OPTARG}" ;;
f) PATERNAL_VCF="${OPTARG}" ;;
g) REF_GENOME="${OPTARG}" ;;
s) WD_PREFIX="${OPTARG}" ;;
o) OUT_DIR="${OPTARG}" ;;
esac
done
echo "${PROBAND_VCF}"
echo "${MATERNAL_VCF}"
echo "${PATERNAL_VCF}"
echo "${REF_GENOME}"
echo "${WD_PREFIX}"
echo "${OUT_DIR}"
PROBAND_PREFIX=$(basename "$PROBAND_VCF" .vcf.gz)
MATERNAL_PREFIX=$(basename "$MATERNAL_VCF" .vcf.gz)
PATERNAL_PREFIX=$(basename "$PATERNAL_VCF" .vcf.gz)
QUERY_FILE_NAME=$(basename "$PROBAND_VCF" .vcf.gz)_needLR-3.3_trio_UWONT150
##This is a filepath to an hg38 reference FASTA - USER ADDS
#REF_GENOME="/n/users/jgust1/reference_files/hg38_reference_genome/hg38.no_alt.fa"
#REF_GENOME="/projects/alignment_references/Homo_sapiens/hg38_no_alt/genome/fasta/hg38_no_alt.fa"
UNSOLVED_DIR="${OUT_DIR}/needLR_output/${QUERY_FILE_NAME}"
mkdir -p ${UNSOLVED_DIR}
##This is the pre-bcftools merged background vcf
ONTUW150_BCFTOOLS_MERGED="${WD_PREFIX}/backend_files/UWONT150_truvari.vcf.gz"
##This is the list of the 1KGP sample names in the order they will be merged in Jasmine
KGP_SAMPLE_NAMES="${WD_PREFIX}/backend_files/ID_order.txt"
##These are the bed files used for annotating the SVs
GENES="${WD_PREFIX}/backend_files/bed_files/PROTEIN_CODING_GENE_gencode.v45.annotation_1kb_slop.bed"
CODING_REGIONS="${WD_PREFIX}/backend_files/bed_files/ENSEMBL_CANONICAL_EXON_in_PROTEIN_CODING_GENE_gencode.v45.annotation.bed"
OMIM_GENES="${WD_PREFIX}/backend_files/bed_files/OMIM_gene_phen_hg38.bed"
CENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_centromeres_endtoend.bed"
PERICENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_pericentromeres_5Mb.bed"
TELOMERES="${WD_PREFIX}/backend_files/bed_files/hg38_telomeres_5Mb.bed"
DEFRABB_HICONF="${WD_PREFIX}/backend_files/bed_files/GRCh38_HG002-T2TQ100-V1.0_stvar.benchmark.bed"
STR="${WD_PREFIX}/backend_files/bed_files/STR_original_motifs.set148.bed"
VNTR="${WD_PREFIX}/backend_files/bed_files/VNTR_original_motifs.set148.bed"
SEGDUPS="${WD_PREFIX}/backend_files/bed_files/SEGDUPS_GIAB_v3.3.bed"
REPEAT_MASKER="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Repeats_RepeatMasker.bed"
GAPS="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Mapping_and_Sequencing_Gap.bed"
I then call with
${params.conda} run -n needLR \
bash ${params.needlr_trio} \
-p *proband*.vcf.gz \
-m *mother*.vcf.gz \
-f *father*.vcf.gz \
-g ${reference_fasta} \
-s ${params.needlr_script_dir} \
-o ./
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels