Skip to content

required running from script directory #6

@mulderdt

Description

@mulderdt

Running needLR from the script directory challenges implementation in nextflow workflows. I made the following changes in order to implement the WD_PREFIX parameter (half implemented apparently), and an OUT_DIR parameter.

needLR_3.3.sh

#!/bin/bash

export PATH=/gsc/software/linux-x86_64-centos7/bcftools-1.19/bin:$PATH
export PATH=/gsc/software/linux-x86_64-centos7/bedtools-2.27.1/bin:$PATH

##Run from inside needLR_local

while getopts f:g:s:o: flag
do
    case "${flag}" in
        f) QUERY_VCFS="${OPTARG}" ;;
        g) REF_GENOME="${OPTARG}" ;;
        s) WD_PREFIX="${OPTARG}" ;;
        o) OUT_DIR="${OPTARG}" ;;
    esac
done

echo ${QUERY_VCFS}
echo ${REF_GENOME}
echo ${WD_PREFIX}
echo ${OUT_DIR}

##This starts the loop for all of the files in QUERY_VCFS
while IFS= read -r QUERY_FILE_PATH; do

##This will be the basename for the output files
QUERY_FILE_NAME=$(basename "$QUERY_FILE_PATH" .vcf.gz)_needLR_3.3

##This is the output directory specific to each query VCF
UNSOLVED_DIR="${OUT_DIR}/needLR_output/$QUERY_FILE_NAME"
mkdir -p $UNSOLVED_DIR

##This is the pre-bcftools merged background vcf
ONTUW150_BCFTOOLS_MERGED="${WD_PREFIX}/backend_files/UWONT150_truvari.vcf.gz"

##This is the list of the 1KGP sample names in the order they will be merged in Jasmine
KGP_SAMPLE_NAMES="${WD_PREFIX}/backend_files/ID_order.txt"

##These are the bed files used for annotating the SVs
GENES="${WD_PREFIX}/backend_files/bed_files/PROTEIN_CODING_GENE_gencode.v45.annotation_1kb_slop.bed"
CODING_REGIONS="${WD_PREFIX}/backend_files/bed_files/ENSEMBL_CANONICAL_EXON_in_PROTEIN_CODING_GENE_gencode.v45.annotation.bed"
OMIM_GENES="${WD_PREFIX}/backend_files/bed_files/OMIM_gene_phen_hg38.bed"
CENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_centromeres_endtoend.bed"
PERICENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_pericentromeres_5Mb.bed"
TELOMERES="${WD_PREFIX}/backend_files/bed_files/hg38_telomeres_5Mb.bed"
DEFRABB_HICONF="${WD_PREFIX}/backend_files/bed_files/GRCh38_HG002-T2TQ100-V1.0_stvar.benchmark.bed"
STR="${WD_PREFIX}/backend_files/bed_files/STR_original_motifs.set148.bed"
VNTR="${WD_PREFIX}/backend_files/bed_files/VNTR_original_motifs.set148.bed"
SEGDUPS="${WD_PREFIX}/backend_files/bed_files/SEGDUPS_GIAB_v3.3.bed"
REPEAT_MASKER="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Repeats_RepeatMasker.bed"
GAPS="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Mapping_and_Sequencing_Gap.bed"

I then call with

                #put the vcfs into a txt file
                ls ${VCF} > vcfs.txt
                #run needlr
                ${params.conda} run -n needLR \
                bash ${params.needlr_single} \
                -f vcfs.txt \
                -g ${reference_fasta} \
                -s ${params.needlr_script_dir} \
                -o ./

needLR_trio_3.3.sh

#!/bin/bash

export PATH=/gsc/software/linux-x86_64-centos7/bcftools-1.19/bin:$PATH
export PATH=/gsc/software/linux-x86_64-centos7/bedtools-2.27.1/bin:$PATH

##Run from inside needLR_local

while getopts p:m:f:g:s:o: flag
do
    case "${flag}" in
        p) PROBAND_VCF="${OPTARG}" ;;
        m) MATERNAL_VCF="${OPTARG}" ;;
        f) PATERNAL_VCF="${OPTARG}" ;;
        g) REF_GENOME="${OPTARG}" ;;
        s) WD_PREFIX="${OPTARG}" ;;
        o) OUT_DIR="${OPTARG}" ;;
    esac
done

echo "${PROBAND_VCF}"
echo "${MATERNAL_VCF}"
echo "${PATERNAL_VCF}"
echo "${REF_GENOME}"
echo "${WD_PREFIX}"
echo "${OUT_DIR}"

PROBAND_PREFIX=$(basename "$PROBAND_VCF" .vcf.gz)
MATERNAL_PREFIX=$(basename "$MATERNAL_VCF" .vcf.gz)
PATERNAL_PREFIX=$(basename "$PATERNAL_VCF" .vcf.gz)

QUERY_FILE_NAME=$(basename "$PROBAND_VCF" .vcf.gz)_needLR-3.3_trio_UWONT150

##This is a filepath to an hg38 reference FASTA - USER ADDS
#REF_GENOME="/n/users/jgust1/reference_files/hg38_reference_genome/hg38.no_alt.fa"
#REF_GENOME="/projects/alignment_references/Homo_sapiens/hg38_no_alt/genome/fasta/hg38_no_alt.fa"

UNSOLVED_DIR="${OUT_DIR}/needLR_output/${QUERY_FILE_NAME}"
mkdir -p ${UNSOLVED_DIR}

##This is the pre-bcftools merged background vcf
ONTUW150_BCFTOOLS_MERGED="${WD_PREFIX}/backend_files/UWONT150_truvari.vcf.gz"

##This is the list of the 1KGP sample names in the order they will be merged in Jasmine
KGP_SAMPLE_NAMES="${WD_PREFIX}/backend_files/ID_order.txt"

##These are the bed files used for annotating the SVs
GENES="${WD_PREFIX}/backend_files/bed_files/PROTEIN_CODING_GENE_gencode.v45.annotation_1kb_slop.bed"
CODING_REGIONS="${WD_PREFIX}/backend_files/bed_files/ENSEMBL_CANONICAL_EXON_in_PROTEIN_CODING_GENE_gencode.v45.annotation.bed"
OMIM_GENES="${WD_PREFIX}/backend_files/bed_files/OMIM_gene_phen_hg38.bed"
CENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_centromeres_endtoend.bed"
PERICENTROMERES="${WD_PREFIX}/backend_files/bed_files/hg38_pericentromeres_5Mb.bed"
TELOMERES="${WD_PREFIX}/backend_files/bed_files/hg38_telomeres_5Mb.bed"
DEFRABB_HICONF="${WD_PREFIX}/backend_files/bed_files/GRCh38_HG002-T2TQ100-V1.0_stvar.benchmark.bed"
STR="${WD_PREFIX}/backend_files/bed_files/STR_original_motifs.set148.bed"
VNTR="${WD_PREFIX}/backend_files/bed_files/VNTR_original_motifs.set148.bed"
SEGDUPS="${WD_PREFIX}/backend_files/bed_files/SEGDUPS_GIAB_v3.3.bed"
REPEAT_MASKER="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Repeats_RepeatMasker.bed"
GAPS="${WD_PREFIX}/backend_files/bed_files/UCSC_hg38_Mapping_and_Sequencing_Gap.bed"

I then call with

                ${params.conda} run -n needLR \
                bash ${params.needlr_trio} \
                -p *proband*.vcf.gz \
                -m *mother*.vcf.gz \
                -f *father*.vcf.gz \
                -g ${reference_fasta} \
                -s ${params.needlr_script_dir} \
                -o ./

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions