Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 0 additions & 63 deletions cluster.yaml

This file was deleted.

2 changes: 0 additions & 2 deletions lsf.yaml

This file was deleted.

110 changes: 79 additions & 31 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,23 @@ if config["qc"]["rsq2"] and config["qc"]["rsq2"] != 'NA':
qualfilt += " || (R2 >= {R2} && MAF < {MAF})".format(
R2=config["qc"]["rsq2"], MAF=config["qc"]["maf"])


if "fix_fam" in config and config["fix_fam"]:
path_plink_bycohort = "{impute_dir}/data/{cohort}_chrall_filtered_fixed.{ext}"
path_plink_merged = "{impute_dir}/data/all_chrall_filtered_fixed.{ext}"
else:
path_plink_bycohort = "{impute_dir}/data/{cohort}_chrall_filtered.{ext}"
path_plink_merged = "{impute_dir}/data/all_chrall_filtered.{ext}"


outs = dict(
stat_report="{impute_dir}/stats/{cohort}_impStats.html",
vcf_bycohort="{impute_dir}/data/{cohort}_chrall_filtered.vcf.gz",
vcf_merged="{impute_dir}/data/all_chrall_filtered.vcf.gz",
bgen_bycohort="{impute_dir}/data/{cohort}_chrall_filtered.bgen",
bgen_merged="{impute_dir}/data/merged/merged_chrall_filtered.bgen",
plink_bycohort="{impute_dir}/data/{cohort}_chrall_filtered.{ext}",
plink_merged="{impute_dir}/data/all_chrall_filtered.{ext}")
plink_bycohort=path_plink_bycohort,
plink_merged=path_plink_merged)


def expand_outs(out):
Expand Down Expand Up @@ -172,7 +181,7 @@ if zipped:
conda: 'envs/p7z.yaml'
threads: 4
resources:
mem_mb = 4000,
mem_mb = 16000,
time_min = 5
shell:
r'''
Expand Down Expand Up @@ -217,8 +226,8 @@ rule stats:
conda: "envs/r_stats.yaml"
threads: 22
resources:
mem_mb = 8000,
walltime = '8:00'
mem = "256GB",
runtime = "8h"
script: "scripts/Post_imputation.Rmd"

# Sample filtering rules
Expand All @@ -244,7 +253,7 @@ rule fixheaders:
threads: 1
resources:
mem_mb = 2048,
walltime = '24:00'
runtime = "24h"
conda: "envs/bcftools.yaml"
shell:
r"""
Expand All @@ -270,7 +279,7 @@ if minimac_version == 'guess':
threads: 1
resources:
mem_mb = 2048,
walltime = '24:00'
runtime = "24h"
script: 'scripts/rule_detect_minimac.py'
else:
rule detect_minimac:
Expand All @@ -282,7 +291,7 @@ else:
localrule: True
resources:
mem_mb = 2048,
walltime = '1:00'
runtime = "1h"
shell: 'echo {params.ver} > {output}'


Expand All @@ -299,8 +308,8 @@ if sampfilt and minimac_version in ['guess', '3']:
sf = sampfilt
threads: 8
resources:
mem_mb = 256,
walltime = '24:00'
mem_mb = 4096,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell:
r'''
Expand Down Expand Up @@ -332,8 +341,8 @@ elif minimac_version in ['guess', '3']:
filt = qualfilt
threads: 8
resources:
mem_mb = 256,
walltime = '24:00'
mem_mb = 4096,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell:
r'''
Expand Down Expand Up @@ -364,8 +373,8 @@ elif sampfilt:
sf = sampfilt
threads: 8
resources:
mem_mb = 256,
walltime = '24:00'
mem_mb = 4096,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell:
r'''
Expand All @@ -385,8 +394,8 @@ else:
filt = qualfilt
threads: 8
resources:
mem_mb = 256,
walltime = '24:00'
mem_mb = 4096,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell:
r'''
Expand Down Expand Up @@ -429,7 +438,7 @@ rule rename:
conda: "envs/bcftools.yaml"
threads: 2
resources:
mem_mb = 1024,
mem_mb = 2048,
time_min = 60
shell:
'''
Expand Down Expand Up @@ -460,7 +469,7 @@ rule renameAuto:
conda: "envs/bcftools.yaml"
threads: 2
resources:
mem_mb = 1024,
mem_mb = 2048,
time_min = 60
shell:
'''
Expand All @@ -473,8 +482,8 @@ rule concat_chroms_samp:
output: "{impute_dir}/data/{cohort}_chrall_filtered.vcf.gz"
threads: 8
resources:
mem_mb = 512,
walltime = '24:00'
mem_mb = 4096,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell: "bcftools concat --threads 8 {input} | bcftools norm -d none -o {output} -Oz"

Expand All @@ -484,7 +493,7 @@ rule index_samples_chrom:
conda: "envs/bcftools.yaml"
threads: 1
resources:
mem_mb = 256,
mem_mb = 1024,
time_min = 120
shell: "bcftools index -t {input}"

Expand All @@ -495,18 +504,31 @@ rule merge_samples_chrom:
output: "{impute_dir}/data/by_chrom/all_chr{chrom}_filtered.vcf.gz"
threads: 8
resources:
mem_mb = 2000,
walltime = "36:00"
mem_mb = 16000,
runtime = "36h"
conda: "envs/bcftools.yaml"
shell: "bcftools merge -m none --threads 8 {input.vcf} | bcftools norm -d none -o {output} -Oz"
shell:
"""
if [ $(echo {input.vcf} | wc -w) -eq 1 ]; then
cp {input.vcf} {output}
else
bcftools merge -m none --threads 8 {input.vcf} | bcftools norm -d none -o {output} -Oz
fi
"""

def tempif_cca(path):
if config.get("temp_ccs", False):
return temp(path)
else:
return path

rule concat_chroms_all:
input: expand("{{impute_dir}}/data/by_chrom/all_chr{chrom}_filtered.vcf.gz", chrom=CHROM)
output: "{impute_dir}/data/all_chrall_filtered.vcf.gz"
output: tempif_cca("{impute_dir}/data/all_chrall_filtered.vcf.gz")
threads: 8
resources:
mem_mb = 256,
walltime = "24:00"
mem_mb = 2048,
runtime = "24h"
conda: "envs/bcftools.yaml"
shell: "bcftools concat -o {output} -Oz --threads 8 {input}"

Expand All @@ -518,8 +540,8 @@ rule make_plink_all:
ID = "--id-delim" if automap_tf else "--double-id"
threads: 10
resources:
mem_mb = 3000,
walltime = "96:00"
mem_mb = 30000,
runtime = "96h"
conda: "envs/plink.yaml"
shell:
"plink --keep-allele-order --vcf {input} {params.ID} --memory 20000 --threads 10 --make-bed "
Expand All @@ -533,12 +555,38 @@ rule make_plink_samp:
ID = "--id-delim" if automap_tf else "--double-id "
threads: 10
resources:
mem_mb = 2000,
walltime = "2:00"
mem_mb = 20000,
runtime = "2h"
conda: "envs/plink.yaml"
shell:
"plink --keep-allele-order --vcf {input} {params.ID} --memory 20000 --threads 10 --make-bed "
"--out {params.out_plink}"

# If bgen outputs are requested
include: "rules/bgen.smk"

if "fix_fam" in config and config["fix_fam"]:
rule fix_fam:
input:
oldfam = config["fix_fam"],
newfam = '{impute_dir}/data/{cohorts}_chrall_filtered.fam'
output: "{impute_dir}/data/{cohorts}_chrall_filtered_fixed.fam"
threads: 1
resources:
mem_mb = 1024,
time_min = 180
conda: "envs/r.yaml"
script: 'scripts/fix_fam.R'

rule link_fix_bedbim:
input:
bed = '{impute_dir}/data/{cohorts}_chrall_filtered.bed',
bim = '{impute_dir}/data/{cohorts}_chrall_filtered.bim'
output:
bed = '{impute_dir}/data/{cohorts}_chrall_filtered_fixed.bed',
bim = '{impute_dir}/data/{cohorts}_chrall_filtered_fixed.bim'
localrule: True
shell: '''
ln -rs {input.bed} {output.bed}
ln -rs {input.bim} {output.bim}
'''
3 changes: 2 additions & 1 deletion workflow/envs/detect_minimac.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- python=3.11
- pysam=0.22.0
- pysam=0.23.1
10 changes: 5 additions & 5 deletions workflow/rules/bgen.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ rule make_bgen:
threads: 1
resources:
mem_mb = 4000,
walltime = "24:00"
runtime = "24h"
container: 'docker://befh/bgen:v1.1.7'
shell:
"""
Expand All @@ -26,7 +26,7 @@ rule cat_bgen_samp:
threads: 1
resources:
mem_mb = 5000,
walltime = "120:00"
runtime = "120h"
container: 'docker://befh/bgen:v1.1.7'
shell:
"""
Expand All @@ -47,8 +47,8 @@ rule make_bgen_allsamp:
args = " ".join(["-g {} -s {}".format(gen, samp) for gen, samp in zip(bga_gen, bga_samp)])
threads: 10
resources:
mem_mb = 4000,
walltime = "24:00"
mem_mb = 40000,
runtime = "24h"
container: 'docker://befh/bgen:v1.1.7'
shell:
"""
Expand All @@ -65,6 +65,6 @@ rule cat_bgen_allsamp:
threads: 1
resources:
mem_mb = 5000,
walltime = "120:00"
runtime = "120h"
container: 'docker://befh/bgen:v1.1.7'
shell: "cat-bgen -g {input.gen} -og {output.gen}; cp {input.samp} {output.samp}"
Loading