From 47045940d4ad11b832cfa27f183dd63b98b9a0b2 Mon Sep 17 00:00:00 2001 From: PollyTikhonova Date: Fri, 11 Sep 2020 18:44:42 +0300 Subject: [PATCH 1/2] single-end --- .../trimmomatic/workflow.smk | 34 ++++++++++++++++++- .../trimmomatic/wrapper.py | 27 ++++++++++----- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/assnake_core_preprocessing/trimmomatic/workflow.smk b/assnake_core_preprocessing/trimmomatic/workflow.smk index ba289d3..d7d8ed0 100644 --- a/assnake_core_preprocessing/trimmomatic/workflow.smk +++ b/assnake_core_preprocessing/trimmomatic/workflow.smk @@ -1,3 +1,17 @@ +from assnake.core.dataset import Dataset + +rule check_for_trimmomatic: + wildcard_constraints: + df="[\w\d_-]+" + input: df_name = '{df}' + run: + if Dataset(input.df_name).dataset_type == 'paired-end': + print('paired-end') + ruleorder: tmtic > tmtic_single + else: + print('single-end') + ruleorder: tmtic_single > tmtic + rule tmtic: input: first=wc_config['fastq_gz_R1_wc'], @@ -9,7 +23,25 @@ rule tmtic: u =wc_config['fastq_gz_tmtic_S_wc'] params: u1=wc_config['fastq_gz_tmtic_unpaired1_wc'], - u2=wc_config['fastq_gz_tmtic_unpaired2_wc'] + u2=wc_config['fastq_gz_tmtic_unpaired2_wc'], + dataset_type="paired-end" + log: "{fs_prefix}/{df}/reads/{preproc}__tmtic_{preset}/{df_sample}.log" + threads: 8#config['assnake-core-preprocessing']['results']['trimmomatic']['threads'] + wildcard_constraints: + params="[\w\d_-]+", + conda: 'env_0.38.yaml' + wrapper: "file://"+os.path.join(config['assnake-core-preprocessing']['install_dir'], 'trimmomatic/wrapper.py') + + + +rule tmtic_single: + input: + first=wc_config['fastq_gz_R1_wc'], + params=os.path.join(config['assnake_db'], "presets/trimmomatic/{preset}.json") + output: + r1=wc_config['fastq_gz_tmtic_R1_wc'] + params: + dataset_type="single-end" log: "{fs_prefix}/{df}/reads/{preproc}__tmtic_{preset}/{df_sample}.log" threads: 8#config['assnake-core-preprocessing']['results']['trimmomatic']['threads'] wildcard_constraints: diff --git a/assnake_core_preprocessing/trimmomatic/wrapper.py b/assnake_core_preprocessing/trimmomatic/wrapper.py index 5995265..491e98a 100644 --- a/assnake_core_preprocessing/trimmomatic/wrapper.py +++ b/assnake_core_preprocessing/trimmomatic/wrapper.py @@ -1,6 +1,7 @@ from snakemake.shell import shell import json, os + def tmtic_params(params_loc): params_str = '' params_dict = {} @@ -46,17 +47,27 @@ def tmtic_params(params_loc): return params_str param_str = tmtic_params(snakemake.input.params) - -shell('''echo "start installing tmmtic" - trimmomatic PE -phred33 \ + +if '{snakemake.params.dataset_type}' == 'paired-end': + shell('''echo "start installing tmmtic" + trimmomatic PE -phred33 \ + -threads {snakemake.threads} \ + {snakemake.input.first} {snakemake.input.second} \ + {snakemake.output.r1} {snakemake.params.u1} \ + {snakemake.output.r2} {snakemake.params.u2} \ + {param_str} \ + >{snakemake.log} 2>&1 && \ + cat {snakemake.params.u1} {snakemake.params.u2} | gzip > {snakemake.output.u} 2>>{snakemake.log} && \ + rm {snakemake.params.u1} {snakemake.params.u2} 2>>{snakemake.log}''') +else: + shell('''echo "start installing tmmtic" + trimmomatic SE -phred33 \ -threads {snakemake.threads} \ - {snakemake.input.first} {snakemake.input.second} \ - {snakemake.output.r1} {snakemake.params.u1} \ - {snakemake.output.r2} {snakemake.params.u2} \ + {snakemake.input.first} \ + {snakemake.output.r1} \ {param_str} \ >{snakemake.log} 2>&1 && \ - cat {snakemake.params.u1} {snakemake.params.u2} | gzip > {snakemake.output.u} 2>>{snakemake.log} && \ - rm {snakemake.params.u1} {snakemake.params.u2} 2>>{snakemake.log}''') + 2>>{snakemake.log}''') if 'task_id' in snakemake.config.keys(): save_to_db(config['task_id'], 'tmtic', str(input), str(log), 'RUN SUCCESSFUL') \ No newline at end of file From 29cf97f128d78aac65e42f5c6ebb604a34960ce0 Mon Sep 17 00:00:00 2001 From: Polina Tikhonova Date: Wed, 30 Sep 2020 13:50:13 +0300 Subject: [PATCH 2/2] severe edits --- .../trimmomatic/workflow.smk | 56 +++++++------------ .../trimmomatic/wrapper.py | 12 ++-- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/assnake_core_preprocessing/trimmomatic/workflow.smk b/assnake_core_preprocessing/trimmomatic/workflow.smk index d7d8ed0..53dd3a8 100644 --- a/assnake_core_preprocessing/trimmomatic/workflow.smk +++ b/assnake_core_preprocessing/trimmomatic/workflow.smk @@ -1,30 +1,30 @@ from assnake.core.dataset import Dataset +import os -rule check_for_trimmomatic: - wildcard_constraints: - df="[\w\d_-]+" - input: df_name = '{df}' - run: - if Dataset(input.df_name).dataset_type == 'paired-end': - print('paired-end') - ruleorder: tmtic > tmtic_single - else: - print('single-end') - ruleorder: tmtic_single > tmtic +def inputs(wildcards): + if Dataset(wildcards.df).dataset_type == 'paired-end': + return [ + wc_config['fastq_gz_R1_wc'], + wc_config['fastq_gz_R2_wc'], + os.path.join(config['assnake_db'], "presets/trimmomatic/{preset}.json"), + ] + else: + return [ + wc_config['fastq_gz_R1_wc'], + os.path.join(config['assnake_db'], "presets/trimmomatic/{preset}.json"), + ] rule tmtic: - input: - first=wc_config['fastq_gz_R1_wc'], - second=wc_config['fastq_gz_R2_wc'], - params=os.path.join(config['assnake_db'], "presets/trimmomatic/{preset}.json") - output: - r1=wc_config['fastq_gz_tmtic_R1_wc'], + input: inputs + output: + r1=wc_config['fastq_gz_tmtic_R1_wc'] + params: r2=wc_config['fastq_gz_tmtic_R2_wc'], - u =wc_config['fastq_gz_tmtic_S_wc'] - params: + u =wc_config['fastq_gz_tmtic_S_wc'], u1=wc_config['fastq_gz_tmtic_unpaired1_wc'], u2=wc_config['fastq_gz_tmtic_unpaired2_wc'], - dataset_type="paired-end" + dataset_type=lambda wildcards: Dataset(wildcards.df).dataset_type + log: "{fs_prefix}/{df}/reads/{preproc}__tmtic_{preset}/{df_sample}.log" threads: 8#config['assnake-core-preprocessing']['results']['trimmomatic']['threads'] wildcard_constraints: @@ -32,19 +32,3 @@ rule tmtic: conda: 'env_0.38.yaml' wrapper: "file://"+os.path.join(config['assnake-core-preprocessing']['install_dir'], 'trimmomatic/wrapper.py') - - -rule tmtic_single: - input: - first=wc_config['fastq_gz_R1_wc'], - params=os.path.join(config['assnake_db'], "presets/trimmomatic/{preset}.json") - output: - r1=wc_config['fastq_gz_tmtic_R1_wc'] - params: - dataset_type="single-end" - log: "{fs_prefix}/{df}/reads/{preproc}__tmtic_{preset}/{df_sample}.log" - threads: 8#config['assnake-core-preprocessing']['results']['trimmomatic']['threads'] - wildcard_constraints: - params="[\w\d_-]+", - conda: 'env_0.38.yaml' - wrapper: "file://"+os.path.join(config['assnake-core-preprocessing']['install_dir'], 'trimmomatic/wrapper.py') \ No newline at end of file diff --git a/assnake_core_preprocessing/trimmomatic/wrapper.py b/assnake_core_preprocessing/trimmomatic/wrapper.py index 491e98a..f3ed6a3 100644 --- a/assnake_core_preprocessing/trimmomatic/wrapper.py +++ b/assnake_core_preprocessing/trimmomatic/wrapper.py @@ -46,24 +46,24 @@ def tmtic_params(params_loc): return params_str -param_str = tmtic_params(snakemake.input.params) +param_str = tmtic_params(snakemake.input[-1]) -if '{snakemake.params.dataset_type}' == 'paired-end': +if snakemake.params.dataset_type == 'paired-end': shell('''echo "start installing tmmtic" trimmomatic PE -phred33 \ -threads {snakemake.threads} \ - {snakemake.input.first} {snakemake.input.second} \ + {snakemake.input[0]} {snakemake.input[1]} \ {snakemake.output.r1} {snakemake.params.u1} \ - {snakemake.output.r2} {snakemake.params.u2} \ + {snakemake.params.r2} {snakemake.params.u2} \ {param_str} \ >{snakemake.log} 2>&1 && \ - cat {snakemake.params.u1} {snakemake.params.u2} | gzip > {snakemake.output.u} 2>>{snakemake.log} && \ + cat {snakemake.params.u1} {snakemake.params.u2} | gzip > {snakemake.params.u} 2>>{snakemake.log} && \ rm {snakemake.params.u1} {snakemake.params.u2} 2>>{snakemake.log}''') else: shell('''echo "start installing tmmtic" trimmomatic SE -phred33 \ -threads {snakemake.threads} \ - {snakemake.input.first} \ + {snakemake.input[0]} \ {snakemake.output.r1} \ {param_str} \ >{snakemake.log} 2>&1 && \