diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 93addc2..0000000 --- a/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -sudo: required -language: python -jdk: openjdk8 -services: docker -python: "3.6" -cache: pip -matrix: - fast_finish: true - -before_install: - # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' - # Pull the docker image first so the test doesn't wait for this - - docker pull nfcore/deepvariant - # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/deepvariant nfcore/deepvariant:1.0 - -install: - # Install Nextflow - - mkdir /tmp/nextflow && cd /tmp/nextflow - - wget -qO- get.nextflow.io | bash - - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow - # Install nf-core/tools - - pip install nf-core - # Reset - - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests - -env: - - NXF_VER='18.10.1' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works - -script: - # Lint the pipeline code - - nf-core lint ${TRAVIS_BUILD_DIR} - # Run the pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CHANGELOG.md b/CHANGELOG.md index d1ae7e5..0611c17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,16 @@ # nf-core/deepvariant: Changelog -## v1.0 - 2018-11-19 +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -This release marks the point where the pipeline was moved from lifebit-ai/DeepVariant over to the new nf-core community, at nf-core/DeepVariant. The [nf-core](http://nf-co.re/) template was used to help ensure that the pipeline meets the standards of nf-core. +## v1.0dev - [date] -In summary, the main changes are: +Initial release of nf-core/deepvariant, created with the [nf-core](https://nf-co.re/) template. -- Rebranding and renaming throughout the pipeline to nf-core -- Updating many parts of the pipeline config and style to meet nf-core standards -- Continuous integration tests with Travis CI -- Dependencies installed via conda -- Added support for BAM input as file, not just a folder -- Added channels to process input files -- Added separate processes for each of the steps in FASTA file preprocessing -- Use of genomes config to specify relevant reference genome files similar to igenomes -- Added BAM size dependent setting of memory -- Slightly improved documentation +### `Added` -...and many more minor tweaks. +### `Fixed` -Thanks to everyone who has worked on this release! +### `Dependencies` + +### `Deprecated` diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 2109619..405fb1b 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,13 +34,13 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct/][version] -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +[homepage]: https://contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/4/code-of-conduct/ diff --git a/Dockerfile b/Dockerfile index 7330b7d..32ceea4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,17 @@ -FROM nfcore/base -LABEL authors="phil@lifebit.ai" \ - description="Docker image containing all requirements for nf-core/deepvariant pipeline" +FROM nfcore/base:dev +LABEL authors="Abhinav Sharma" \ + description="Docker image containing all software requirements for the nf-core/deepvariant pipeline" +# Install the conda environment COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-deepvariant-1.0/bin:$PATH +RUN conda env create --quiet -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-deepvariant-1.0dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-deepvariant-1.0dev > nf-core-deepvariant-1.0dev.yml + +# Instruct R processes to use these empty files instead of clashing with a local version +RUN touch .Rprofile +RUN touch .Renviron diff --git a/LICENSE b/LICENSE index 9cf1062..3d82551 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ MIT License +Copyright (c) Abhinav Sharma + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights diff --git a/README.md b/README.md index a878b94..ebb7628 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,76 @@ -![deepvariant](https://raw.githubusercontent.com/nf-core/deepvariant/master/docs/images/deepvariant_logo.png) +# ![nf-core/deepvariant](docs/images/nf-core-deepvariant_logo.png) -# nf-core/deepvariant +**Google's DeepVariant as a Nextflow workflow.**. -**Deep Variant as a Nextflow pipeline** +[![GitHub Actions CI Status](https://github.com/nf-core/deepvariant/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/deepvariant/actions) +[![GitHub Actions Linting Status](https://github.com/nf-core/deepvariant/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/deepvariant/actions) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-brightgreen.svg)](https://www.nextflow.io/) -[![Build Status](https://travis-ci.org/nf-core/deepvariant.svg?branch=master)](https://travis-ci.org/nf-core/deepvariant) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A518.10.1-brightgreen.svg)](https://www.nextflow.io/) -[![Gitter](https://img.shields.io/badge/gitter-%20join%20chat%20%E2%86%92-4fb99a.svg)](https://gitter.im/nf-core/Lobby) - -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/deepvariant.svg)](https://hub.docker.com/r/nfcore/deepvariant) -![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepvariant-4A154B?logo=slack)](https://nfcore.slack.com/channels/deepvariant) -A Nextflow pipeline for running the [Google DeepVariant variant caller](https://github.com/google/deepvariant). +## Introduction -## What is DeepVariant and why in Nextflow? +This pipeline simplifies the use of [Google's DeepVariant](https://github.com/google/deepvariant) and packages it as a workflow. -The Google Brain Team in December 2017 released a [Variant Caller](https://www.ebi.ac.uk/training/online/course/human-genetic-variation-i-introduction/variant-identification-and-analysis/what-variant) based on DeepLearning: DeepVariant. +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. -In practice, DeepVariant first builds images based on the BAM file, then it uses a DeepLearning image recognition approach to obtain the variants and eventually it converts the output of the prediction in the standard VCF format. +## Quick Start -DeepVariant as a Nextflow pipeline provides several advantages to the users. It handles automatically, through **preprocessing steps**, the creation of some extra needed indexed and compressed files which are a necessary input for DeepVariant, and which should normally manually be produced by the users. -Variant Calling can be performed at the same time on **multiple BAM files** and thanks to the internal parallelization of Nextflow no resources are wasted. -Nextflow's support of Docker allows to produce the results in a computational reproducible and clean way by running every step inside of a **Docker container**. +1. Install [`nextflow`](https://nf-co.re/usage/installation) -For more detailed information about Google's DeepVariant please refer to [google/deepvariant](https://github.com/google/deepvariant) or this [blog post](https://research.googleblog.com/2017/12/deepvariant-highly-accurate-genomes.html).
-For more information about DeepVariant in Nextflow please refer to this [blog post](https://blog.lifebit.ai/post/deepvariant/?utm_campaign=documentation&utm_source=github&utm_medium=web) +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ -## Quick Start +3. Download the pipeline and test it on a minimal dataset with a single command: -**Warning DeepVariant can be very computationally intensive to run.** + ```bash + nextflow run nf-core/deepvariant -profile test, + ``` -To **test** the pipeline you can run: + > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. -```bash -nextflow run nf-core/deepvariant -profile test,docker -``` +4. Start running your own analysis! -A typical run on **whole genome data** looks like this: + -```bash -nextflow run nf-core/deepvariant --genome hg19 --bam yourBamFile --bed yourBedFile -profile standard,docker -``` + ```bash + nextflow run nf-core/deepvariant -profile --input samplesheet.csv --genome GRCh37 + ``` -In this case variants are called on the bam files contained in the testdata directory. The hg19 version of the reference genome is used. -One vcf files is produced and can be found in the folder "results" +See [usage docs](https://nf-co.re/deepvariant/usage) for all of the available options when running the pipeline. -A typical run on **whole exome data** looks like this: +## Documentation -```bash -nextflow run nf-core/deepvariant --exome --genome hg19 --bam_folder myBamFolder --bed myBedFile -profile standard,docker -``` +The nf-core/deepvariant pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/deepvariant/usage) and [output](https://nf-co.re/deepvariant/output). -## Documentation + -The nf-core/deepvariant documentation is split into the following files: +## Credits -1. [Installation](docs/installation.md) -2. [Running the pipeline](docs/usage.md) -3. Pipeline configuration - - [Adding your own system](docs/configuration/adding_your_own.md) - - [Reference genomes](docs/configuration/reference_genomes.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](docs/troubleshooting.md) -6. [More about DeepVariant](docs/about.md) +This workflow was originally developed at [Lifebit](https://lifebit.ai/?utm_campaign=documentation&utm_source=github&utm_medium=web), by @luisas, to ease and reduce cost for variant calling analyses -## More about the pipeline + +The `v2.0` of `nf-core/deepvariant` workflow was modernized by Abhinav Sharma ( @abhi18av ), Alain Coletta ( @alaincoletta). -As shown in the following picture, the worklow both contains **preprocessing steps** ( light blue ones ) and proper **variant calling steps** ( darker blue ones ). +Many thanks to nf-core and those who have helped out along the way too, including (but not limited to): @ewels, @MaxUlysse, @apeltzer, @sven1103 & @pditommaso -Some input files ar optional and if not given, they will be automatically created for the user during the preprocessing steps. If these are given, the preprocessing steps are skipped. For more information about preprocessing, please refer to the "INPUT PARAMETERS" section. +## Contributions and Support -The worklow **accepts one reference genome and multiple BAM files as input**. The variant calling for the several input BAM files will be processed completely indipendently and will produce indipendent VCF result files. The advantage of this approach is that the variant calling of the different BAM files can be parallelized internally by Nextflow and take advantage of all the cores of the machine in order to get the results at the fastest. +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -

- -

+For further information or help, don't hesitate to get in touch on the [Slack `#deepvariant` channel](https://nfcore.slack.com/channels/deepvariant) (you can join with [this invite](https://nf-co.re/join/slack)). -## Credits +## Citation -This pipeline was originally developed at [Lifebit](https://lifebit.ai/?utm_campaign=documentation&utm_source=github&utm_medium=web), by @luisas, to ease and reduce cost for variant calling analyses + + -Many thanks to nf-core and those who have helped out along the way too, including (but not limited to): @ewels, @MaxUlysse, @apeltzer, @sven1103 & @pditommaso +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). +> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) diff --git a/Singularity b/Singularity deleted file mode 100644 index bbeecf8..0000000 --- a/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -From:nfcore/base -Bootstrap:docker - -%labels - MAINTAINER Phil Palmer - DESCRIPTION Singularity image containing all requirements for the nf-core/deepvariant pipeline - VERSION 1.0 - -%environment - PATH=/opt/conda/envs/nf-core-deepvariant-1.0/bin:$PATH - export PATH - -%files - environment.yml / - -%post - /opt/conda/bin/conda env create -f /environment.yml - /opt/conda/bin/conda clean -a diff --git a/assets/email_template.html b/assets/email_template.html index dcd3574..0275b73 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -5,12 +5,14 @@ - + nf-core/deepvariant Pipeline Report
+ +

nf-core/deepvariant v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index e82c6c3..4f37348 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,12 @@ -======================================== - nf-core/deepvariant v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/deepvariant v${version} +---------------------------------------------------- + Run Name: $runName <% if (success){ @@ -17,23 +23,6 @@ ${errorReport} } %> -<% if (!success){ - out << """#################################################### -## nf-core/deepvariant execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} else { - out << "## nf-core/deepvariant execution completed successfully! ##" -} -%> - - - - The workflow was completed at $dateComplete (duration: $duration) The command used to launch the workflow was as follows: diff --git a/conf/multiqc_config.yaml b/assets/multiqc_config.yaml similarity index 79% rename from conf/multiqc_config.yaml rename to assets/multiqc_config.yaml index fcd5aa8..ea163f0 100644 --- a/conf/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -3,5 +3,9 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - nf-core/deepvariant-software-versions: + software_versions: order: -1000 + nf-core-deepvariant-summary: + order: -1001 + +export_plots: true diff --git a/assets/nf-core-deepvariant_logo.png b/assets/nf-core-deepvariant_logo.png new file mode 100644 index 0000000..4f2545e Binary files /dev/null and b/assets/nf-core-deepvariant_logo.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 0000000..cd5a39a --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,8 @@ +group,replicate,fastq_1,fastq_2 +control,1,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +control,2,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz +control,3,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz +treatment,1,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +treatment,2,/path/to/fastq/files/AEG588A5_S5_L003_R1_001.fastq.gz, +treatment,3,/path/to/fastq/files/AEG588A6_S6_L003_R1_001.fastq.gz, +treatment,3,/path/to/fastq/files/AEG588A6_S6_L004_R1_001.fastq.gz, diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index fd1cd73..95a365b 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,11 +1,53 @@ To: $email Subject: $subject Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfmimeboundary" +Content-Type: multipart/related;boundary="nfcoremimeboundary" ---nfmimeboundary +--nfcoremimeboundary Content-Type: text/html; charset=utf-8 $email_html ---nfmimeboundary-- +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-deepvariant_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-deepvariant_logo.png" + +<% out << new File("$projectDir/assets/nf-core-deepvariant_logo.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py new file mode 100755 index 0000000..51b1846 --- /dev/null +++ b/bin/check_samplesheet.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +# TODO nf-core: Update the script to check the samplesheet +# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_sispa.csv + +import os +import sys +import errno +import argparse + + +def parse_args(args=None): + Description = "Reformat nf-core/deepvariant samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("FILE_OUT", help="Output file.") + return parser.parse_args(args) + + +def make_dir(path): + if len(path) > 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise exception + + +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) + + +# TODO nf-core: Update the check_samplesheet function +def check_samplesheet(file_in, file_out): + """ + This function checks that the samplesheet follows the following structure: + + group,replicate,fastq_1,fastq_2 + WT,1,WT_LIB1_REP1_1.fastq.gz,WT_LIB1_REP1_2.fastq.gz + WT,1,WT_LIB2_REP1_1.fastq.gz,WT_LIB2_REP1_2.fastq.gz + WT,2,WT_LIB1_REP2_1.fastq.gz,WT_LIB1_REP2_2.fastq.gz + KO,1,KO_LIB1_REP1_1.fastq.gz,KO_LIB1_REP1_2.fastq.gz + """ + + sample_run_dict = {} + with open(file_in, "r") as fin: + + ## Check header + MIN_COLS = 3 + # TODO nf-core: Update the column names for the input samplesheet + HEADER = ["group", "replicate", "fastq_1", "fastq_2"] + header = [x.strip('"') for x in fin.readline().strip().split(",")] + if header[: len(HEADER)] != HEADER: + print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + sys.exit(1) + + ## Check sample entries + for line in fin: + lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + ## Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + "Invalid number of columns (minimum = {})!".format(len(HEADER)), + "Line", + line, + ) + num_cols = len([x for x in lspl if x]) + if num_cols < MIN_COLS: + print_error( + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Line", + line, + ) + + ## Check sample name entries + sample, replicate, fastq_1, fastq_2 = lspl[: len(HEADER)] + if sample: + if sample.find(" ") != -1: + print_error("Group entry contains spaces!", "Line", line) + else: + print_error("Group entry has not been specified!", "Line", line) + + ## Check replicate entry is integer + if not replicate.isdigit(): + print_error("Replicate id not an integer!", "Line", line) + replicate = int(replicate) + + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2] + if sample and fastq_1 and fastq_2: ## Paired-end short reads + sample_info = ["0", fastq_1, fastq_2] + elif sample and fastq_1 and not fastq_2: ## Single-end short reads + sample_info = ["1", fastq_1, fastq_2] + else: + print_error("Invalid combination of columns provided!", "Line", line) + ## Create sample mapping dictionary = {sample: {replicate : [ single_end, fastq_1, fastq_2 ]}} + if sample not in sample_run_dict: + sample_run_dict[sample] = {} + if replicate not in sample_run_dict[sample]: + sample_run_dict[sample][replicate] = [sample_info] + else: + if sample_info in sample_run_dict[sample][replicate]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_run_dict[sample][replicate].append(sample_info) + + ## Write validated samplesheet with appropriate columns + if len(sample_run_dict) > 0: + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + with open(file_out, "w") as fout: + + fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") + for sample in sorted(sample_run_dict.keys()): + + ## Check that replicate ids are in format 1.. + uniq_rep_ids = set(sample_run_dict[sample].keys()) + if len(uniq_rep_ids) != max(uniq_rep_ids): + print_error( + "Replicate ids must start with 1..!", + "Group", + sample, + ) + for replicate in sorted(sample_run_dict[sample].keys()): + + ## Check that multiple runs of the same sample are of the same datatype + if not all( + x[0] == sample_run_dict[sample][replicate][0][0] for x in sample_run_dict[sample][replicate] + ): + print_error( + "Multiple runs of a sample must be of the same datatype!", + "Group", + sample, + ) + ## Write to file + for idx, sample_info in enumerate(sample_run_dict[sample][replicate]): + sample_id = "{}_R{}_T{}".format(sample, replicate, idx + 1) + fout.write(",".join([sample_id] + sample_info) + "\n") + + +def main(args=None): + args = parse_args(args) + check_samplesheet(args.FILE_IN, args.FILE_OUT) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index abe1335..0000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index d427718..e240ebf 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -1,48 +1,36 @@ #!/usr/bin/env python from __future__ import print_function -from collections import OrderedDict -import re +import os -regexes = { - 'nf-core/deepvariant': ['v_nf_deepvariant.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'DeepVariant': ['v_deepvariant.txt', r"deepvariant-(\S+)-"], - 'Python': ['v_python.txt', r"Python (\S+)"], - 'Pip': ['v_pip.txt', r"pip (\S+)"], - 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], - 'Htslib': ['v_samtools.txt', r"Using htslib (\S+)"], - 'Lbzip2': ['v_lbzip2.txt', r"lbzip2 version (\S+)"], - 'Bzip2': ['v_bzip2.txt', r"bzip2, Version (\S+)"], -} -results = OrderedDict() -results['nf-core/deepvariant'] = 'N/A' -results['Nextflow'] = 'N/A' -results['DeepVariant'] = 'N/A' -results['Python'] = 'N/A' -results['Pip'] = 'N/A' -results['Samtools'] = 'N/A' -results['Htslib'] = 'N/A' -results['Lbzip2'] = 'N/A' -results['Bzip2'] = 'N/A' +results = {} +version_files = [x for x in os.listdir(".") if x.endswith(".version.txt")] +for version_file in version_files: -# Search each file using its regex -for k, v in regexes.items(): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) + software = version_file.replace(".version.txt", "") + if software == "pipeline": + software = "nf-core/deepvariant" + + with open(version_file) as fin: + version = fin.read().strip() + results[software] = version # Dump to YAML -print (''' -id: 'nf-core/deepvariant-software-versions' +print( + """ +id: 'software_versions' section_name: 'nf-core/deepvariant Software Versions' section_href: 'https://github.com/nf-core/deepvariant' plot_type: 'html' description: 'are collected at run time from the software output.' data: |
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") +""" +) +for k, v in sorted(results.items()): + print("
{}
{}
".format(k, v)) +print(" ") + +# Write out regexes as csv file: +with open("software_versions.csv", "w") as f: + for k, v in sorted(results.items()): + f.write("{}\t{}\n".format(k, v)) diff --git a/conf/awsbatch.config b/conf/awsbatch.config deleted file mode 100644 index 4a4a002..0000000 --- a/conf/awsbatch.config +++ /dev/null @@ -1,25 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for AWS Batch - * ------------------------------------------------- - * Imported under the 'awsbatch' Nextflow profile in nextflow.config - * Uses docker for software depedencies automagically, so not specified here. - */ - -aws.region = params.awsregion -process.executor = 'awsbatch' -process.queue = params.awsqueue -executor.awscli = '/home/ec2-user/miniconda/bin/aws' -params.tracedir = './' - -process { - withName:makeExamples_with_bed { - cpus = 4 - } - withName:makeExamples { - cpus = 4 - } - withName:call_variants { - cpus = 4 - } -} diff --git a/conf/base.config b/conf/base.config index 71be5f2..cacd2f1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,38 +11,39 @@ process { - container = params.container - - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } - - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - withName:make_examples { - cpus = { check_max( 20, 'cpus' ) } - memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')} - time = { check_max( 10.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } - withName:call_variants { - cpus = { check_max( 20, 'cpus' ) } - memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')} - time = { check_max( 10.h * task.attempt, 'time' ) } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } - withName:postprocess_variants { - cpus = { check_max( 20, 'cpus' ) } - memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')} - time = { check_max( 10.h * task.attempt, 'time' ) } + withLabel:process_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 } - - -} - -params { - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h } diff --git a/conf/binac.config b/conf/binac.config deleted file mode 100644 index 68ba98f..0000000 --- a/conf/binac.config +++ /dev/null @@ -1,22 +0,0 @@ -/* - * ---------------------------------------------------------------------------- - * Nextflow config file for use with Singularity on BINAC cluster in Tuebingen - * ---------------------------------------------------------------------------- - * Defines basic usage limits and singularity image id. - */ - -singularity { - enabled = true -} - -process { - beforeScript = 'module load devel/singularity/2.6.0' - executor = 'pbs' - queue = 'short' -} - -params { - max_memory = 128.GB - max_cpus = 28 - max_time = 48.h -} diff --git a/conf/genomes.config b/conf/genomes.config deleted file mode 100644 index 2d08bc5..0000000 --- a/conf/genomes.config +++ /dev/null @@ -1,49 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for Genomes paths - * ------------------------------------------------- - * Defines reference genomes, using s3 paths - * Can be used by any config that customises the base - * path using $params.genomes_base / --genomes_base - */ - -params { - // Genomes reference file paths on UPPMAX - genomes { - 'h38' { - fasta="${params.genomes_base}/h38/GRCh38.p10.genome.fa" - fai="${params.genomes_base}/h38/GRCh38.p10.genome.fa.fai" - fastagz="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz" - gzfai="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz.fai" - gzi="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz.gzi" - } - 'hs37d5' { - fasta="${params.genomes_base}/hs37d5/hs37d5.fa" - fai="${params.genomes_base}/hs37d5/hs37d5.fa.fai" - fastagz="${params.genomes_base}/hs37d5/hs37d5.fa.gz" - gzfai="${params.genomes_base}/hs37d5/hs37d5.fa.gz.fai" - gzi="${params.genomes_base}/hs37d5/hs37d5.fa.gz.gzi" - } - 'grch37primary' { - fasta="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa" - fai="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.fai" - fastagz="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz" - gzfai="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai" - gzi="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi" - } - 'hg19chr20' { - fasta="${params.genomes_base}/hg19chr20/chr20.fa" - fai="${params.genomes_base}/hg19chr20/chr20.fa.fai" - fastagz="${params.genomes_base}/hg19chr20/chr20.fa.gz" - gzfai="${params.genomes_base}/hg19chr20/chr20.fa.gz.fai" - gzi="${params.genomes_base}/hg19chr20/chr20.fa.gz.gzi" - } - 'hg19' { - fasta="${params.genomes_base}/hg19/hg19.fa" - fai="${params.genomes_base}/hg19/hg19.fa.fai" - fastagz="${params.genomes_base}/hg19/hg19.fa.gz" - gzfai="${params.genomes_base}/hg19/hg19.fa.gz.fai" - gzi="${params.genomes_base}/hg19/hg19.fa.gz.gzi" - } - } -} diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 0000000..eae4e6a --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,420 @@ +/* + * ------------------------------------------------- + * Nextflow config file for iGenomes paths + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Can be used by any config that customises the base + * path using $params.igenomes_base / --igenomes_base + */ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/genome.{1.bt2,2.bt2,3.bt2,4.bt2,rev.1.bt2,rev.2.bt2}" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 0000000..145a725 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,35 @@ +/* + * -------------------------------------------------- + * Config file for defining DSL2 per module options + * -------------------------------------------------- + * + * Available keys to override module options: + * args = Additional arguments appended to command in module. + * args2 = Second set of arguments appended to command in module (multi-tool modules). + * publish_dir = Directory to publish results. + * publish_by_id = Publish results in separate folders by meta.id value. + * publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension + * The value of "directory" is appended to the standard "publish_dir" path as defined above. + * If publish_files == null (unspecified) - All files are published. + * If publish_files == false - No files are published. + * suffix = File name suffix for output files. + * + */ + +params { + modules { + 'cat_fastq' { + publish_dir = 'fastq' + } + 'fastqc' { + args = "--quiet" + } + 'trimgalore' { + args = "--fastqc" + publish_files = ['txt':'', 'html':'fastqc', 'zip':'fastqc'] + } + 'multiqc' { + args = "" + } + } +} diff --git a/conf/test.config b/conf/test.config index f05b94e..a33b6a3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,14 +4,23 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/deepvariant -profile test + * nextflow run nf-core/deepvariant -profile test, */ params { - max_cpus = 2 + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 max_memory = 6.GB - max_time = 48.h - bam = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/NA12878_S1.chr20.10_10p1mb.bam' - bed = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/test_nist.b37_chr20_100kbp_at_10mb.bed' - genome = 'hg19chr20' + max_time = 6.h + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_amplicon.csv' + + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz' } diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 0000000..b1a15f1 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,21 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running full-size tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a full size pipeline test. Use as follows: + * nextflow run nf-core/deepvariant -profile test_full, + */ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_amplicon.csv' + + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz' +} diff --git a/docs/README.md b/docs/README.md index 3a40edd..7d7078b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,11 +1,10 @@ # nf-core/deepvariant: Documentation -The nf-core/deepvariant documentation is split into the following files: +The nf-core/deepvariant documentation is split into the following pages: -1. [Installation](installation.md) -2. [Running the pipeline](usage.md) -3. Pipeline configuration - * [Adding your own system](configuration/adding_your_own.md) - * [Reference genomes](configuration/reference_genomes.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](troubleshooting.md) +* [Usage](usage.md) + * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +* [Output](output.md) + * An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md deleted file mode 100644 index 9647c3a..0000000 --- a/docs/configuration/adding_your_own.md +++ /dev/null @@ -1,86 +0,0 @@ -# nf-core/deepvariant: Configuration for other clusters - -It is entirely possible to run this pipeline on other clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster. - -> If you think that there are other people using the pipeline who would benefit from your configuration (eg. other common cluster setups), please let us know. We can add a new configuration and profile which can used by specifying `-profile ` when running the pipeline. - -If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more). - -A basic configuration comes with the pipeline, which runs by default (the `standard` config profile - see [`conf/base.config`](../conf/base.config)). This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change. - -## Cluster Environment -By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - -To specify your cluster environment, add the following line to your config file: - -```nextflow -process.executor = 'YOUR_SYSTEM_TYPE' -``` - -Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html). - -Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option: - -```nextflow -process { - executor = 'SLURM' - clusterOptions = '-A myproject' -} -``` - - -## Software Requirements -To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity. - -Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use. - -### Docker -Docker is a great way to run nf-core/deepvariant, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/deepvariant](https://hub.docker.com/r/nfcore/deepvariant/) image that we have created and is hosted at dockerhub at run time. - -To add docker support to your own config file, add the following: - -```nextflow -docker.enabled = true -process.container = "nfcore/deepvariant" -``` - -Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`. - - -### Singularity image -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. - -To specify singularity usage in your pipeline config file, add the following: - -```nextflow -singularity.enabled = true -process.container = "shub://nf-core/deepvariant" -``` - -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. -Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -```bash -singularity pull --name nf-core-deepvariant.simg shub://nf-core/deepvariant -``` - -Then transfer this file and point the config file to the image: - -```nextflow -singularity.enabled = true -process.container = "/path/to/nf-core-deepvariant.simg" -``` - - -### Conda -If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements. -To use conda in your own config file, add the following: - -```nextflow -process.conda = "$baseDir/environment.yml" -``` diff --git a/docs/images/deepvariant_logo.png b/docs/images/deepvariant_logo.png deleted file mode 100644 index 34e2e17..0000000 Binary files a/docs/images/deepvariant_logo.png and /dev/null differ diff --git a/docs/images/deepvariant_logo.svg b/docs/images/deepvariant_logo.svg deleted file mode 100644 index f62c15e..0000000 --- a/docs/images/deepvariant_logo.svg +++ /dev/null @@ -1,205 +0,0 @@ - -image/svg+xmlnf- -core/ -deepvariant - \ No newline at end of file diff --git a/docs/images/nf-core-deepvariant_logo.png b/docs/images/nf-core-deepvariant_logo.png new file mode 100644 index 0000000..4f2545e Binary files /dev/null and b/docs/images/nf-core-deepvariant_logo.png differ diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index f6153b3..0000000 --- a/docs/installation.md +++ /dev/null @@ -1,115 +0,0 @@ -# nf-core/deepvariant: Installation - -To start using the nf-core/deepvariant pipeline, follow the steps below: - -1. [Install Nextflow](#1-install-nextflow) -2. [Install the pipeline](#2-install-the-pipeline) - * [Automatic](#21-automatic) - * [Offline](#22-offline) - * [Development](#23-development) -3. [Pipeline configuration](#3-pipeline-configuration) - * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity) - * [Software deps: Bioconda](#32-software-deps-bioconda) - * [Configuration profiles](#33-configuration-profiles) -4. [Reference genomes](#4-reference-genomes) -5. [Appendices](#appendices) - * [Running on UPPMAX](#running-on-uppmax) - -## 1) Install NextFlow -Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands: - -```bash -# Make sure that Java v8+ is installed: -java -version - -# Install Nextflow -curl -fsSL get.nextflow.io | bash - -# Add Nextflow binary to your PATH: -mv nextflow ~/bin/ -# OR system-wide installation: -# sudo mv nextflow /usr/local/bin -``` - -See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow. - -## 2) Install the pipeline - -#### 2.1) Automatic -This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/deepvariant` is specified as the pipeline name. - -#### 2.2) Offline -The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually: - -```bash -wget https://github.com/nf-core/deepvariant/archive/master.zip -mkdir -p ~/my-pipelines/nf-core/ -unzip master.zip -d ~/my-pipelines/nf-core/ -cd ~/my_data/ -nextflow run ~/my-pipelines/nf-core/deepvariant-master -``` - -To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file: - -```bash -export NXF_OFFLINE='TRUE' -``` - -#### 2.3) Development - -If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above. - - -## 3) Pipeline configuration -By default, the pipeline runs with the `standard` configuration profile. This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. You can see this configuration in [`conf/base.config`](../conf/base.config). - -Be warned of two important points about this default configuration: - -1. The default profile uses the `local` executor - * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported. -2. Nextflow will expect all software to be installed and available on the `PATH` - -#### 3.1) Software deps: Docker -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, running the pipeline with the option `-profile standard,docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/deepvariant). - -#### 3.1) Software deps: Singularity -If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. -The process is very similar: running the pipeline with the option `-profile standard,singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub. - -If running offline with Singularity, you'll need to download and transfer the Singularity image first: - -```bash -singularity pull --name nf-core-deepvariant.simg shub://nf-core/deepvariant -``` - -Once transferred, use `-with-singularity` and specify the path to the image file: - -```bash -nextflow run /path/to/nf-core-deepvariant -with-singularity nf-core-deepvariant.simg -``` - -Remember to pull updated versions of the singularity image if you update the pipeline. - - -#### 3.2) Software deps: conda -If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements. -This is slower and less reproducible than the above, but is still better than having to install all requirements yourself! -The pipeline ships with a conda environment file and nextflow has built-in support for this. -To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile standard,conda` - - -## Appendices - -#### Running on UPPMAX -To run the pipeline on the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (`rackham`, `irma`, `bianca` etc), use the command line flag `-profile uppmax`. This tells Nextflow to submit jobs using the SLURM job executor with Singularity for software dependencies. - -Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project `. The pipeline will exit with an error message if you try to run it pipeline with the default UPPMAX config profile without a project. - -**Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`: - -```nextflow -params.project = 'project_ID' // eg. b2017123 -``` diff --git a/docs/output.md b/docs/output.md index 2ff00dc..47192fe 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,30 +1,63 @@ # nf-core/deepvariant: Output -This document describes the processes and output produced by the pipeline. +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/deepvariant/output](https://nf-co.re/deepvariant/output) -Main steps: +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -- preprocessing of fasta/reference files (fai, fastagz, gzfai & gzi) - - These steps can be skipped if the the `--genome` options is used or the fai, fastagz, gzfai & gzi files are supplied. -- preprocessing of BAM files - - Also can be skipped if BAM files contain necessary read group line -- make examples - - Gets bam files and converts them to images ( named examples ) -- call variants - - Does the variant calling based on the ML trained model. -- post processing +## Introduction - - Trasforms the variant calling output (tfrecord file) into a standard vcf file. +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. -For further reading and documentation about deepvariant see [google/deepvariant](https://github.com/google/deepvariant) +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. -## VCF + -The output from DeepVariant is a variant call file or [vcf v4.2](https://samtools.github.io/hts-specs/VCFv4.2.pdf) +## Pipeline overview -**Output directory: `results`** (by default) +The pipeline is built using [Nextflow](https://www.nextflow.io/) +and processes data using the following steps: -- `pipeline_info` - - produced by nextflow -- `{bamSampleName}.vcf` - - output vcf file produced by deepvariant +* [FastQC](#fastqc) - Read quality control +* [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline +* [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +## FastQC + +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. + +For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +**Output files:** + +* `fastqc/` + * `*_fastqc.html`: FastQC report containing quality metrics for your untrimmed raw fastq files. +* `fastqc/zips/` + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. + +## MultiQC + +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. + +For more information about how to use MultiQC reports, see [https://multiqc.info](https://multiqc.info). + +**Output files:** + +* `multiqc/` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `multiqc_plots/`: directory containing static images from the report in various formats. + +## Pipeline information + +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +**Output files:** + +* `pipeline_info/` + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index d9c4a79..0000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,16 +0,0 @@ -# nf-core/deepvariant: Troubleshooting - -## Input files not found - -If you are having trouble with the inputs for the tool its recommended that you read [about preprocessing](usage.md#about-preprocessing) and [BAM folder input](usage.md#--bam_folder) - -## Data organization - -The pipeline can't take a list of multiple input files - it takes a glob expression. If your input files are scattered in different paths then we recommend that you generate a directory with symlinked files. If running in paired end mode please make sure that your files are sensibly named so that they can be properly paired. See the previous point. - -## Extra resources and getting help - -If you still have an issue with running the pipeline then feel free to contact us. -Have a look at the [pipeline website](https://github.com/nf-core/deepvariant) to find out how. - -If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). diff --git a/docs/usage.md b/docs/usage.md index 71dce65..556461d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,287 +1,138 @@ # nf-core/deepvariant: Usage -## Table of contents - -- [Introduction](#general-nextflow-info) -- [Running the pipeline](#running-the-pipeline) - - [About preprocessing](#about-preprocessing) -- [Updating the pipeline](#updating-the-pipeline) -- [Reproducibility](#reproducibility) -- [Main arguments](#main-arguments) - - [`-profile`](#-profile-single-dash) - - [`docker`](#docker) - - [`awsbatch`](#awsbatch) - - [`standard`](#standard) - - [`none`](#none) - - [`--bam`](#--bam) - - [`--bam_folder`](#--bam_folder) - - [`--bam_file_prefix`](#--bam_file_prefix) - - [`--bed`](#--bed) -- [Reference Genomes](#reference-genomes) - - [`--genome`](#--genome) - - [`hg19`](#hg19) - - [`hg19chr20`](#hg19chr20) - - [`h38`](#h38) - - [`grch37primary`](#grch37primary) - - [`hs37d5`](#hs37d5) - - [`--genomes_base`](#--genomes_base) - - [`--fasta`](#--fasta) - - [`--fai`](#--fai) - - [`--fastagz`](#--fastagz) - - [`--gzfai`](#--gzfai) - - [`--gzi`](#--gzi) -- [Exome Data](#exome-data) - - [`--exome`](#--exome) -- [Job Resources](#job-resources) -- [Automatic resubmission](#automatic-resubmission) -- [Custom resource requests](#custom-resource-requests) -- [AWS batch specific parameters](#aws-batch-specific-parameters) - - [`-awsbatch`](#-awsbatch) - - [`--awsqueue`](#--awsqueue) - - [`--awsregion`](#--awsregion) -- [Other command line parameters](#other-command-line-parameters) - - [`--outdir`](#--outdir) - - [`--email`](#--email) - - [`-name`](#-name-single-dash) - - [`-resume`](#-resume-single-dash) - - [`-c`](#-c-single-dash) - - [`--max_memory`](#--max_memory) - - [`--max_time`](#--max_time) - - [`--max_cpus`](#--max_cpus) - - [`--plaintext_emails`](#--plaintext_emails) - - [`--sampleLevel`](#--sampleLevel) - - [`--multiqc_config`](#--multiqc_config) -- [Memory](#memory) - -## General Nextflow info - -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. - -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/deepvariant/usage](https://nf-co.re/deepvariant/usage) -```bash -NXF_OPTS='-Xms1g -Xmx4g' -``` +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Running the pipeline +## Introduction -The typical command for running the pipeline is as follows: + -```bash -nextflow run nf-core/deepvariant --genome hg19 --bam testdata/test.bam --bed testdata/test.bed -profile standard,docker -``` +## Samplesheet input -Note that the pipeline will create the following files in your working directory: +You will need to create a samplesheet file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. ```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. +--input '[path to samplesheet file]' ``` -### About preprocessing +### Multiple replicates -DeepVariant, in order to run at its fastest, requires some indexed and compressed versions of both the reference genome and the BAM files. With DeepVariant in Nextflow, if you wish, you can only use as an input the fasta and the BAM file and let us do the work for you in a clean and standarized way (standard tools like [samtools](http://samtools.sourceforge.net/) are used for indexing and every step is run inside of a Docker container). +The `group` identifier is the same when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given experimental group must be 1. Below is an example for a single experimental group in triplicate: -This is how the list of the needed input files looks like. If these are passed all as input parameters, the preprocessing steps will be skipped. - -``` -NA12878_S1.chr20.10_10p1mb.bam test_nist.b37_chr20_100kbp_at_10mb.bed NA12878_S1.chr20.10_10p1mb.bam.bai -ucsc.hg19.chr20.unittest.fasta ucsc.hg19.chr20.unittest.fasta.fai ucsc.hg19.chr20.unittest.fasta.gz -ucsc.hg19.chr20.unittest.fasta.gz.fai ucsc.hg19.chr20.unittest.fasta.gz.gzi -``` - -If you do not have all of them, these are the file you can give as input to the Nextflow pipeline, and the rest will be automatically produced for you . - -``` -NA12878_S1.chr20.10_10p1b.bam -test_nist.b37_chr20_100kbp_at_10mb.bed -ucsc.hg19.chr20.unittest.fasta +```bash +group,replicate,fastq_1,fastq_2 +control,1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +control,2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +control,3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz ``` -### Updating the pipeline +### Multiple runs of the same library -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +The `group` and `replicate` identifiers are the same when you have re-sequenced the same sample more than once (e.g. to increase sequencing depth). The pipeline will concatenate the raw reads before alignment. Below is an example for two samples sequenced across multiple lanes: ```bash -nextflow pull nf-core/deepvariant +group,replicate,fastq_1,fastq_2 +control,1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +control,1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +treatment,1,AEG588A4_S4_L003_R1_001.fastq.gz,AEG588A4_S4_L003_R2_001.fastq.gz +treatment,1,AEG588A4_S4_L004_R1_001.fastq.gz,AEG588A4_S4_L004_R2_001.fastq.gz ``` -### Reproducibility +### Full design -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +A final design file consisting of both single- and paired-end data may look something like the one below. This is for two experimental groups in triplicate, where the last replicate of the `treatment` group has been sequenced twice. -First, go to the [nf-core/deepvariant releases page](https://github.com/nf-core/deepvariant/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. - -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. - -## Main Arguments - -### `-profile` - -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile standard,docker` - the order of arguments is important! - -- `standard` - - The default profile, used if `-profile` is not specified at all. - - Runs locally and expects all software to be installed and available on the `PATH`. -- `docker` - - A generic configuration profile to be used with [Docker](http://docker.com/) - - Pulls software from dockerhub: [`nfcore/deepvariant`](http://hub.docker.com/r/nfcore/deepvariant/) -- `singularity` - - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - - Pulls software from singularity-hub -- `conda` - - A generic configuration profile to be used with [conda](https://conda.io/docs/) - - Pulls most software from [Bioconda](https://bioconda.github.io/) -- `awsbatch` - - A generic configuration profile to be used with AWS Batch. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters -- `test_s3` - - A profile for testing the pipeline with files on an s3 bucket - - Other than the `docker` profile no further inputs are required -- `none` - - No configuration at all. Useful if you want to build your own config from scratch and want to avoid loading in the default `base` config profile (not recommended). - -### `--bam` - -Use this to specify the BAM file - -``` ---bam "/path/to/bam/file" +```bash +group,replicate,fastq_1,fastq_2 +control,1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +control,2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +control,3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +treatment,1,AEG588A4_S4_L003_R1_001.fastq.gz, +treatment,2,AEG588A5_S5_L003_R1_001.fastq.gz, +treatment,3,AEG588A6_S6_L003_R1_001.fastq.gz, +treatment,3,AEG588A6_S6_L004_R1_001.fastq.gz, ``` -OR - -### `--bam_folder` +| Column | Description | +|----------------|-------------------------------------------------------------------------------------------------------------| +| `group` | Group identifier for sample. This will be identical for replicate samples from the same experimental group. | +| `replicate` | Integer representing replicate number. Must start from `1..`. | +| `fastq_1` | Full path to FastQ file for read 1. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for read 2. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | -Use this to specify a folder containing BAM files. Allows multiple BAM files to be analyzed at once. All BAM files will be analyzed unless `--bame_file_prefix` is used (see below). For example: +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -``` ---bam_folder "/path/to/folder/where/bam/files/are" -``` - -**! TIP** -All the input files can be used in s3 buckets too and the s3://path/to/files/in/bucket can be used instead of a local path. - -### `--bam_file_prefix` +## Running the pipeline -- In case only some specific files inside the BAM folder should be used as input, a file prefix can be defined by: - - `--bam_file_prefix` +The typical command for running the pipeline is as follows: +```bash +nextflow run nf-core/deepvariant --input samplesheet.csv -profile docker ``` ---bam_file_prefix MYPREFIX OPTIONAL -``` - -### `--bed` - -- Path to bedfile, specifying region to be analysed must also be supplied - -### Reference Genomes - -The pipelines can acccept the refernece genome that was used to create the BAM file(s) in one of two ways. Either the reference genome can be specified eg `--genome hg19` (default) or by supplying a relevant fasta file (and optionally the indexes). -### `--genome` +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -Standard versions of the genome are prepared with all their compressed and indexed file in a lifebit s3 bucket. -They can be used with the following values for the `--genome` tag: - -- `hg19` - - Use if reads were aligned against hg19 reference genome to produce input bam file(s) -- `hg19chr20` - - For testing purposes: chromosome 20 of the hg19 reference genome -- `h38` - - Use if reads were aligned against GRCh38 reference genome to produce input bam file(s) -- `grch37primary` - - Use if reads were aligned against GRCh37 primary reference genome to produce input bam file(s) -- `hs37d5` - - Use if reads were aligned against hs37d5 reference genome to produce input bam file(s) - -### `--genomes_base` - -Base directory location of genomes (default = "s3://deepvariant-data/genomes") for use on computing clusters - -OR you can use your own reference genome version, by using the following parameters: - -The following parameter are optional: - -### `--fasta` - -- Path to fasta reference - -### `--fai` - -- Path to fasta index generated using `samtools faidx` - -### `--fastagz` - -- Path to gzipped fasta - -### `--gzfai` - -- Path to index of gzipped fasta generated using `samtools faidx` - -### `--gzi` - -- Path to bgzip index format (.gzi) - -If the `fai`, `fastagz`, `gzfai` and `gzi` parameters are not passed, they will be automatically be produced for you and you will be able to find them in the "preprocessingOUTPUT" folder. - -### Exome Data - -### `--exome` - -- For exome bam files - -If you are running on exome data you need to prodive the `--exome` flag so that the right verison of the model will be used. +Note that the pipeline will create the following files in your working directory: ```bash -nextflow run nf-core/deepvariant --genome hg19 --bam_folder myBamFolder --bed myBedFile --exome +work # Directory containing the nextflow working files +results # Finished results (configurable, see below) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -## Job Resources - -### Automatic resubmission - -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. - -### Custom resource requests - -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files in [`conf`](../conf) for examples. +### Updating the pipeline -## AWS Batch specific parameters +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. +```bash +nextflow pull nf-core/deepvariant +``` -### `--awsqueue` +### Reproducibility -The JobQueue that you intend to use on AWS Batch. +It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -### `--awsregion` +First, go to the [nf-core/deepvariant releases page](https://github.com/nf-core/deepvariant/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. -The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. +## Core Nextflow arguments -## Other command line parameters +> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -### `--outdir` +### `-profile` -The output directory where the results will be saved. +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -### `--email` +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Conda) - see below. -Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to speicfy this on the command line for every run. +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -### `-name` +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). -Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -**NB:** Single hyphen (core Nextflow option) +* `docker` + * A generic configuration profile to be used with [Docker](https://docker.com/) + * Pulls software from Docker Hub: [`nfcore/deepvariant`](https://hub.docker.com/r/nfcore/deepvariant/) +* `singularity` + * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) + * Pulls software from Docker Hub: [`nfcore/deepvariant`](https://hub.docker.com/r/nfcore/deepvariant/) +* `podman` + * A generic configuration profile to be used with [Podman](https://podman.io/) + * Pulls software from Docker Hub: [`nfcore/deepvariant`](https://hub.docker.com/r/nfcore/deepvariant/) +* `conda` + * Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity or Podman. + * A generic configuration profile to be used with [Conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) +* `test` + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters ### `-resume` @@ -289,40 +140,44 @@ Specify this when restarting a pipeline. Nextflow will used cached results from You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -**NB:** Single hyphen (core Nextflow option) - ### `-c` -Specify the path to a specific config file (this is a core NextFlow command). +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -**NB:** Single hyphen (core Nextflow option) +#### Custom resource requests -Note - you can use this to override defaults. For example, you can specify a config file using `-c` that contains the following: +Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. + +Whilst these default requirements will hopefully work for most people with most data, you may find that you want to customise the compute resources that the pipeline requests. You can do this by creating a custom config file. For example, to give the workflow process `star` 32GB of memory, you could use the following config: ```nextflow -process.$multiqc.module = [] +process { + withName: star { + memory = 32.GB + } +} ``` -### `--max_memory` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information. + +If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition above). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'`` +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -### `--max_time` -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` +### Running in the background -### `--max_cpus` +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -### `--plaintext_email` +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -Set to receive plain-text e-mails instead of HTML formatted. +#### Nextflow memory requirements -### `--multiqc_config` -Specify a path to a custom MultiQC configuration file. +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -## Memory -DeepVariant is quite memory intensive. The most memory intensive process is `make_examples`. The memory requirement should be approximately 10-15x the size of your BAM file. For example, for a 5GB BAM file the memory should be set to 50GB. Fortunately this is set automaticaally for you in `base.config` for all of the man deepvariant processes, so you don't need change anything more and can run the pipeline as normal. +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/environment.yml b/environment.yml index 8ddc57f..d773247 100644 --- a/environment.yml +++ b/environment.yml @@ -1,14 +1,12 @@ -name: nf-core-deepvariant-1.0 +# You can use this file to create a conda environment for this pipeline: +# conda env create -f environment.yml +name: nf-core-deepvariant-1.0dev channels: - - bioconda - conda-forge + - bioconda - defaults dependencies: - - python=2.7.15 - - pip=10.0.1 - - deepvariant=0.7.0 - - picard=2.18.7 - - samtools=1.9 - - htslib=1.9 - - lbzip2=2.5 - - bzip2=1.0.6 + # TODO nf-core: Add required software dependencies here + - bioconda::fastqc=0.11.9 + - bioconda::trim-galore=0.6.5 + - bioconda::multiqc=1.9 diff --git a/lib/Checks.groovy b/lib/Checks.groovy new file mode 100644 index 0000000..f9103a5 --- /dev/null +++ b/lib/Checks.groovy @@ -0,0 +1,48 @@ +/* + * This file holds several functions used to perform standard checks for the nf-core pipeline template. + */ + +class Checks { + + static void aws_batch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + assert !params.awsqueue || !params.awsregion : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + assert !params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + assert params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." + } + } + + static void hostname(workflow, params, log) { + Map colors = Headers.log_colours(params.monochrome_logs) + if (params.hostnames) { + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { + log.info "=${colors.yellow}====================================================${colors.reset}=\n" + + "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" + + " ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" + + "=${colors.yellow}====================================================${colors.reset}=" + } + } + } + } + } + + // Citation string + private static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.1400710\n\n" + + "* The nf-core framework\n" + + " https://dx.doi.org/10.1038/s41587-020-0439-x\n" + + " https://rdcu.be/b1GjZ\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + +} \ No newline at end of file diff --git a/lib/Completion.groovy b/lib/Completion.groovy new file mode 100644 index 0000000..e745c98 --- /dev/null +++ b/lib/Completion.groovy @@ -0,0 +1,127 @@ +/* + * Functions to be run on completion of pipeline + */ + +class Completion { + static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success && !params.skip_multiqc) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = Headers.log_colours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + static void summary(workflow, params, log) { + Map colors = Headers.log_colours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + Checks.hostname(workflow, params, log) + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } +} \ No newline at end of file diff --git a/lib/Headers.groovy b/lib/Headers.groovy new file mode 100644 index 0000000..15d1d38 --- /dev/null +++ b/lib/Headers.groovy @@ -0,0 +1,43 @@ +/* + * This file holds several functions used to render the nf-core ANSI header. + */ + +class Headers { + + private static Map log_colours(Boolean monochrome_logs) { + Map colorcodes = [:] + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" + return colorcodes + } + + static String dashed_line(monochrome_logs) { + Map colors = log_colours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + static String nf_core(workflow, monochrome_logs) { + Map colors = log_colours(monochrome_logs) + String.format( + """\n + ${dashed_line(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashed_line(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Schema.groovy b/lib/Schema.groovy new file mode 100644 index 0000000..e241f0c --- /dev/null +++ b/lib/Schema.groovy @@ -0,0 +1,228 @@ +/* + * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. + */ + +import groovy.json.JsonSlurper + +class Schema { + /* + * This method tries to read a JSON params file + */ + private static LinkedHashMap params_load(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = params_read(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + /* + Method to actually read in JSON file using Groovy. + Group (as Key), values are all parameters + - Parameter1 as Key, Description as Value + - Parameter2 as Key, Description as Value + .... + Group + - + */ + private static LinkedHashMap params_read(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + */ + def params_map = new LinkedHashMap() + json_params.each { key, val -> + def Map group = json_params."$key".properties // Gets the property object of the group + def title = json_params."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + return params_map + } + + /* + * Get maximum number of characters across all parameter names + */ + private static Integer params_max_chars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } + + /* + * Beautify parameters for --help + */ + private static String params_help(workflow, params, json_schema, command) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + output += "Typical pipeline command:\n\n" + output += " ${command}\n\n" + def params_map = params_load(json_schema) + def max_chars = params_max_chars(params_map) + 1 + for (group in params_map.keySet()) { + output += group + "\n" + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + def type = "[" + group_params.get(param).type + "]" + def description = group_params.get(param).description + output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n" + } + output += "\n" + } + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + return output + } + + /* + * Groovy Map summarising parameters/workflow options used by the pipeline + */ + private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = "$workflow.containerEngine" + } + if (workflow.container) { + workflow_summary['container'] = "$workflow.container" + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def blacklist = ['hostnames'] + def params_map = params_load(json_schema) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param) && !blacklist.contains(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value == null) { + if (param_type == 'boolean') { + schema_value = false + } + if (param_type == 'string') { + schema_value = '' + } + if (param_type == 'integer') { + schema_value = 0 + } + } else { + if (param_type == 'string') { + if (schema_value.contains('$baseDir') || schema_value.contains('${baseDir}')) { + def sub_string = schema_value.replace('\$baseDir','') + sub_string = sub_string.replace('\${baseDir}','') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir','') + sub_string = sub_string.replace('\${params.outdir}','') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + if (params_value != schema_value) { + sub_params.put("$param", params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + /* + * Beautify parameters for summary and return as string + */ + private static String params_summary_log(workflow, params, json_schema) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + def params_map = params_summary_map(workflow, params, json_schema) + def max_chars = params_max_chars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += group + "\n" + for (param in group_params.keySet()) { + output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n" + } + output += "\n" + } + } + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + return output + } + + static String params_summary_multiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } +} \ No newline at end of file diff --git a/main.nf b/main.nf index f75c67b..8c205be 100644 --- a/main.nf +++ b/main.nf @@ -9,509 +9,76 @@ ---------------------------------------------------------------------------------------- */ - -def helpMessage() { - log.info""" - ========================================= - nf-core/deepvariant v${workflow.manifest.version} - ========================================= - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/deepvariant --genome hg19 --bam_folder "s3://deepvariant-data/test-bam/" --bed testdata/test.bed -profile standard,docker - - Mandatory arguments: - --bam_folder Path to folder containing BAM files (reads must have been aligned to specified reference file, see below) - OR - --bam Path to BAM file (reads must have been aligned to specified reference file, see below) - --bed Path to bed file specifying regions to be analyzed - - References: If you wish to overwrite default reference of hg19. - --genome Reference genome: hg19 | hg19chr20 (for testing) | h38 | grch37primary | hs37d5 - --genomes_base Base directory location of genomes (default = "s3://deepvariant-data/genomes") - OR - --fasta Path to fasta reference - --fai Path to fasta index generated using `samtools faidx` - --fastagz Path to gzipped fasta - --gzfai Path to index of gzipped fasta - --gzi Path to bgzip index format (.gzi) produced by faidx - *Pass all five files above to skip the fasta preprocessing step - - Options: - -profile Configuration profile to use. Can use multiple (comma separated) - Available: standard, conda, docker, singularity, awsbatch, test - --exome For exome bam files - --rgid Bam file read group line id incase its needed (default = 4) - --rglb Bam file read group line library incase its needed (default = 'lib1') - --rgpl Bam file read group line platform incase its needed (default = 'illumina') - --rgpu Bam file read group line platform unit incase its needed (default = 'unit1') - --rgsm Bam file read group line sample incase its needed (default = 20) - - Other options: - --outdir The output directory where the results will be saved (default = results) - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. - --help Bring up this help message - - AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on - """.stripIndent() -} +nextflow.preview.dsl = 2 /* - * SET UP CONFIGURATION VARIABLES + * Print help message if required */ - -// Show help emssage -if (params.help){ - helpMessage() +if (params.help) { + def command = "nextflow run nf-core/deepvariant --input samplesheet.csv -profile docker" + log.info Headers.nf_core(workflow, params.monochrome_logs) + log.info Schema.params_help("$baseDir/nextflow_schema.json", command) exit 0 } -//set model for call variants either whole genome or exome -model= params.exome ? 'wes' : 'wgs' - -//set fasta files equal to genome option if used -params.fasta = params.genome ? params.genomes[ params.genome ].fasta : false -params.fai = params.genome ? params.genomes[ params.genome ].fai : false -params.fastagz = params.genome ? params.genomes[ params.genome ].fastagz : false -params.gzfai = params.genome ? params.genomes[ params.genome ].gzfai : false -params.gzi = params.genome ? params.genomes[ params.genome ].gzi : false - -//setup fasta channels -(fastaToIndexCh, fastaToGzCh, fastaToGzFaiCh, fastaToGziCh) = Channel.fromPath(params.fasta).into(4) - -bedToExamples = Channel - .fromPath(params.bed) - .ifEmpty { exit 1, "please specify --bed option (--bed bedfile)"} - -if(params.fai){ -faiToExamples = Channel - .fromPath(params.fai) - .ifEmpty{exit 1, "Fai file not found: ${params.fai}"} -} - -if(params.fastagz){ -fastaGz = Channel - .fromPath(params.fastagz) - .ifEmpty{exit 1, "Fastagz file not found: ${params.fastagz}"} - .into {fastaGzToExamples; fastaGzToVariants } -} +/* + * Validate parameters + */ +if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, "Input samplesheet file not specified!" } -if(params.gzfai){ -gzFai = Channel - .fromPath(params.gzfai) - .ifEmpty{exit 1, "gzfai file not found: ${params.gzfai}"} - .into{gzFaiToExamples; gzFaiToVariants } -} +/* + * Reference genomes + */ +params.fasta = params.genomes[params.genome]?.fasta +if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } -if(params.gzi){ -gzi = Channel - .fromPath(params.gzi) - .ifEmpty{exit 1, "gzi file not found: ${params.gzi}"} - .into {gziToExamples; gziToVariants} -} -/*-------------------------------------------------- - Bam related input files ----------------------------------------------------*/ -if(params.bam_folder) { - Channel - .fromPath("${params.bam_folder}/${params.bam_file_prefix}*.bam") - .ifEmpty { exit 1, "${params.bam_folder}/${params.bam_file_prefix}*.bam not found"} - .set{bamChannel} -} else if(params.bam) { - Channel - .fromPath(params.bam) - .ifEmpty { exit 1, "${params.bam} not found"} - .set{bamChannel} -} else { - exit 1, "please specify --bam OR --bam_folder" -} +/* + * Check parameters + */ +Checks.aws_batch(workflow, params) // Check AWS batch settings +Checks.hostname(workflow, params, log) // Check the hostnames against configured profiles -/*-------------------------------------------------- - For workflow summary ----------------------------------------------------*/ +/* + * Print parameter summary + */ // Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ - custom_runName = workflow.runName -} - -// Check workDir/outdir paths to be S3 buckets if running on AWSBatch -// related: https://github.com/nextflow-io/nextflow/issues/813 -if( workflow.profile == 'awsbatch') { - if(!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!" -} - - -// Header log info -log.info """======================================================= - ,--./,-. - ___ __ __ __ ___ /,-._.--~\' - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,\' -nf-core/deepvariant v${workflow.manifest.version}" -=======================================================""" -def summary = [:] -summary['Pipeline Name'] = 'nf-core/deepvariant' -summary['Pipeline Version'] = workflow.manifest.version -if(params.bam_folder) summary['Bam folder'] = params.bam_folder -if(params.bam) summary['Bam file'] = params.bam -summary['Bed file'] = params.bed -if(params.genome) summary['Reference genome'] = params.genome -if(params.fasta) summary['Fasta Ref'] = params.fasta -if(params.fai) summary['Fasta Index'] = params.fai -if(params.fastagz) summary['Fasta gzipped '] = params.fastagz -if(params.gzfai) summary['Fasta gzipped Index'] = params.gzfai -if(params.gzi) summary['Fasta bgzip Index'] = params.gzi -if(params.rgid != 4) summary['BAM Read Group ID'] = params.rgid -if(params.rglb != 'lib1') summary['BAM Read Group Library'] = params.rglb -if(params.rgpl != 'illumina') summary['BAM Read Group Platform'] = params.rgpl -if(params.rgpu != 'unit1') summary['BAM Read Group Platform Unit'] = params.rgpu -if(params.rgsm != 20) summary['BAM Read Group Sample'] = params.rgsm -summary['Max Memory'] = params.max_memory -summary['Max CPUs'] = params.max_cpus -summary['Max Time'] = params.max_time -summary['Model'] = model -summary['Output dir'] = params.outdir -summary['Working dir'] = workflow.workDir -summary['Container Engine'] = workflow.containerEngine -if(workflow.containerEngine) summary['Container'] = workflow.container -summary['Current home'] = "$HOME" -summary['Current user'] = "$USER" -summary['Current path'] = "$PWD" -summary['Working dir'] = workflow.workDir -summary['Output dir'] = params.outdir -summary['Script dir'] = workflow.projectDir -summary['Config Profile'] = workflow.profile -if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") -log.info "=========================================" - - -def create_workflow_summary(summary) { - - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') - yaml_file.text = """ - id: 'nf-core-deepvariant-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/deepvariant Workflow Summary' - section_href: 'https://github.com/nf-core/deepvariant' - plot_type: 'html' - data: | -
-${summary.collect { k,v -> "
$k
${v ?: 'N/A'}
" }.join("\n")} -
- """.stripIndent() - - return yaml_file -} - - -/******************************************************************** - preprocess fasta files processes - Collects all the files related to the reference genome, like - .fai,.gz ... - If the user gives them as an input, they are used - If not they are produced in this process given only the fasta file. -********************************************************************/ - -if(!params.fai) { - process preprocess_fai { - tag "${fasta}.fai" - publishDir "$baseDir/sampleDerivatives" - - input: - file(fasta) from fastaToIndexCh - - output: - file("${fasta}.fai") into faiToExamples - - script: - """ - samtools faidx $fasta - """ - } +// this has the bonus effect of catching both -name and --name +run_name = params.name +if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + run_name = workflow.runName } +summary = Schema.params_summary(workflow, params, run_name) +log.info Headers.nf_core(workflow, params.monochrome_logs) +log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") +log.info "-\033[2m----------------------------------------------------\033[0m-" -if(!params.fastagz) { - process preprocess_fastagz { - tag "${fasta}.gz" - publishDir "$baseDir/sampleDerivatives" - - input: - file(fasta) from fastaToGzCh - - output: - file("*.gz") into (tmpFastaGzCh, fastaGzToExamples, fastaGzToVariants) - - script: - """ - bgzip -c ${fasta} > ${fasta}.gz - """ - } -} +workflow_summary = Schema.params_mqc_summary(summary) +ch_workflow_summary = Channel.value(workflow_summary) -if(!params.gzfai) { - process preprocess_gzfai { - tag "${fasta}.gz.fai" - publishDir "$baseDir/sampleDerivatives" - - input: - file(fasta) from fastaToGzFaiCh - file(fastagz) from tmpFastaGzCh - - output: - file("*.gz.fai") into (gzFaiToExamples, gzFaiToVariants) - - script: - """ - samtools faidx $fastagz - """ - } -} - -if(!params.gzi){ - process preprocess_gzi { - tag "${fasta}.gz.gzi" - publishDir "$baseDir/sampleDerivatives" - - input: - file(fasta) from fastaToGziCh - - output: - file("*.gz.gzi") into (gziToExamples, gziToVariants) - - script: - """ - bgzip -c -i ${fasta} > ${fasta}.gz - """ - } -} - -/******************************************************************** - process preprocess_bam - Takes care of the read group line. -********************************************************************/ - -process preprocess_bam{ - - tag "${bam}" - publishDir "$baseDir/sampleDerivatives" - - input: - file(bam) from bamChannel - - output: - set file("ready/${bam}"), file("ready/${bam}.bai") into completeChannel - - script: - """ - mkdir ready - [[ `samtools view -H ${bam} | grep '@RG' | wc -l` > 0 ]] && { mv $bam ready;}|| { picard AddOrReplaceReadGroups \ - I=${bam} \ - O=ready/${bam} \ - RGID=${params.rgid} \ - RGLB=${params.rglb} \ - RGPL=${params.rgpl} \ - RGPU=${params.rgpu} \ - RGSM=${params.rgsm};} - cd ready ;samtools index ${bam}; - """ -} - -/******************************************************************** - process make_examples - Getting bam files and converting them to images ( named examples ) -********************************************************************/ - -process make_examples{ - - tag "${bam}" - publishDir "${params.outdir}/make_examples", mode: 'copy', - saveAs: {filename -> "logs/log"} - - input: - file fai from faiToExamples.collect() - file fastagz from fastaGzToExamples.collect() - file gzfai from gzFaiToExamples.collect() - file gzi from gziToExamples.collect() - file bed from bedToExamples.collect() - set file(bam), file(bai) from completeChannel - - output: - set file("${bam}"),file('*_shardedExamples') into examples - - script: - """ - mkdir logs - mkdir ${bam.baseName}_shardedExamples - dv_make_examples.py \ - --cores ${task.cpus} \ - --sample ${bam} \ - --ref ${fastagz} \ - --reads ${bam} \ - --regions ${bed} \ - --logdir logs \ - --examples ${bam.baseName}_shardedExamples - """ -} -/******************************************************************** - process call_variants - Doing the variant calling based on the ML trained model. -********************************************************************/ - -process call_variants{ - - tag "${bam}" - - input: - set file(bam),file(shardedExamples) from examples - - output: - set file(bam),file('*_call_variants_output.tfrecord') into called_variants - - script: - """ - dv_call_variants.py \ - --cores ${task.cpus} \ - --sample ${bam} \ - --outfile ${bam.baseName}_call_variants_output.tfrecord \ - --examples $shardedExamples \ - --model ${model} - """ -} - - - -/******************************************************************** - process postprocess_variants - Trasforming the variant calling output (tfrecord file) into a standard vcf file. -********************************************************************/ - -process postprocess_variants{ - - tag "${bam}" - - publishDir params.outdir, mode: 'copy' - - input: - file fastagz from fastaGzToVariants.collect() - file gzfai from gzFaiToVariants.collect() - file gzi from gziToVariants.collect() - set file(bam),file('call_variants_output.tfrecord') from called_variants - - output: - set val("${bam}"),file("${bam}.vcf") into postout +/* + * Include local pipeline modules + */ +include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation' params(params) +include { CHECK_SAMPLESHEET; check_samplesheet_paths } from './modules/local/check_samplesheet' params(params) - script: - """ - dv_postprocess_variants.py \ - --ref ${fastagz} \ - --infile call_variants_output.tfrecord \ - --outfile "${bam}.vcf" - """ -} /* - * Parse software version numbers + * Run the workflow */ -process get_software_versions { +workflow { - output: - file 'software_versions_mqc.yaml' into software_versions_yaml + CHECK_SAMPLESHEET(ch_input) + .splitCsv(header:true, sep:',') + .map { check_samplesheet_paths(it) } + .set { ch_raw_reads } - script: - """ - echo $workflow.manifest.version &> v_nf_deepvariant.txt - echo $workflow.nextflow.version &> v_nextflow.txt - ls /opt/conda/pkgs/ &> v_deepvariant.txt - python --version &> v_python.txt - pip --version &> v_pip.txt - samtools --version &> v_samtools.txt - lbzip2 --version &> v_lbzip2.txt - bzip2 --version &> v_bzip2.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ } +/* + * Send completion email + */ workflow.onComplete { - // Set up the e-mail variables - def subject = "[nf-core/deepvariant] Successful: $workflow.runName" - if(!workflow.success){ - subject = "[nf-core/deepvariant] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (params.email) { - try { - if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/deepvariant] Sent summary e-mail to $params.email (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/deepvariant] Sent summary e-mail to $params.email (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/Documentation/" ) - if( !output_d.exists() ) { - output_d.mkdirs() - } - def output_hf = new File( output_d, "pipeline_report.html" ) - output_hf.withWriter { w -> w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) - output_tf.withWriter { w -> w << email_txt } - - log.info "[nf-core/deepvariant] Pipeline Complete! You can find your results in $baseDir/${params.outdir}" + def multiqc_report = [] + Completion.email(workflow, params, summary, run_name, baseDir, multiqc_report, log) + Completion.summary(workflow, params, log) } diff --git a/modules/local/process/cat_fastq.nf b/modules/local/process/cat_fastq.nf new file mode 100644 index 0000000..83a2d72 --- /dev/null +++ b/modules/local/process/cat_fastq.nf @@ -0,0 +1,54 @@ +// Import generic module functions +include { initOptions; saveFiles } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +/* + * Concatenate FastQ files + */ +process CAT_FASTQ { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'merged_fastq', publish_id:meta.id) } + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "biocontainers/biocontainers:v1.2.0_cv1" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + readList = reads.collect{it.toString()} + if (!meta.single_end) { + if (readList.size > 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.sort().join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.sort().join(' ')} > ${prefix}_2.merged.fastq.gz + """ + } else { + """ + ln -s ${reads[0]} ${prefix}_1.merged.fastq.gz + ln -s ${reads[1]} ${prefix}_2.merged.fastq.gz + """ + } + } else { + if (readList.size > 1) { + """ + cat ${readList.sort().join(' ')} > ${prefix}.merged.fastq.gz + """ + } else { + """ + ln -s $reads ${prefix}.merged.fastq.gz + """ + } + } +} diff --git a/modules/local/process/functions.nf b/modules/local/process/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/local/process/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/process/get_software_versions.nf b/modules/local/process/get_software_versions.nf new file mode 100644 index 0000000..7ad0405 --- /dev/null +++ b/modules/local/process/get_software_versions.nf @@ -0,0 +1,32 @@ +// Import generic module functions +include { saveFiles } from './functions' + +params.options = [:] + +/* + * Parse software version numbers + */ +process GET_SOFTWARE_VERSIONS { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', publish_id:'') } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "quay.io/biocontainers/python:3.8.3" + + cache false + + input: + path versions + + output: + path "software_versions.csv" , emit: csv + path 'software_versions_mqc.yaml', emit: yaml + + script: + """ + echo $workflow.manifest.version > pipeline.version.txt + echo $workflow.nextflow.version > nextflow.version.txt + scrape_software_versions.py &> software_versions_mqc.yaml + """ +} diff --git a/modules/local/process/samplesheet_check.nf b/modules/local/process/samplesheet_check.nf new file mode 100644 index 0000000..c238ad6 --- /dev/null +++ b/modules/local/process/samplesheet_check.nf @@ -0,0 +1,44 @@ +// Import generic module functions +include { saveFiles } from './functions' + +params.options = [:] + +/* + * Reformat design file and check validity + */ +process SAMPLESHEET_CHECK { + tag "$samplesheet" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', publish_id:'') } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "quay.io/biocontainers/python:3.8.3" + + input: + path samplesheet + + output: + path '*.csv' + + + script: // This script is bundled with the pipeline, in nf-core/deepvariant/bin/ + """ + check_samplesheet.py $samplesheet samplesheet.valid.csv + """ +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def get_samplesheet_paths(LinkedHashMap row) { + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + + def array = [] + if (meta.single_end) { + array = [ meta, [ file(row.fastq_1, checkIfExists: true) ] ] + } else { + array = [ meta, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] + } + return array +} diff --git a/modules/local/software/bwa/index/functions.nf b/modules/local/software/bwa/index/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/local/software/bwa/index/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/software/bwa/index/main.nf b/modules/local/software/bwa/index/main.nf new file mode 100644 index 0000000..3edc97a --- /dev/null +++ b/modules/local/software/bwa/index/main.nf @@ -0,0 +1,43 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process FASTQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "quay.io/biocontainers/fastqc:0.11.9--0" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "*.version.txt" , emit: version + + script: + // Add soft-links to original FastQs for consistent naming in pipeline + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } +} diff --git a/modules/local/software/bwa/mem/functions.nf b/modules/local/software/bwa/mem/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/local/software/bwa/mem/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/software/bwa/mem/main.nf b/modules/local/software/bwa/mem/main.nf new file mode 100644 index 0000000..3edc97a --- /dev/null +++ b/modules/local/software/bwa/mem/main.nf @@ -0,0 +1,43 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process FASTQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "quay.io/biocontainers/fastqc:0.11.9--0" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "*.version.txt" , emit: version + + script: + // Add soft-links to original FastQs for consistent naming in pipeline + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } +} diff --git a/modules/local/software/deepvariant/functions.nf b/modules/local/software/deepvariant/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/local/software/deepvariant/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/software/deepvariant/main.nf b/modules/local/software/deepvariant/main.nf new file mode 100644 index 0000000..a7b5790 --- /dev/null +++ b/modules/local/software/deepvariant/main.nf @@ -0,0 +1,41 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process DEEP_VARIANT { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: meta.id) } + + conda(params.enable_conda ? "bioconda::deepvariant=1.0.0" : null) + container "google/deepvariant:1.0.0" + + input: + tuple val(meta), val(sample_name), path(bam), path(bai) + tuple val(regions), val(model_type) + tuple path(ref_fasta), path(ref_fasta_fai) + + output: + tuple val(meta), path("${sample_name}.vcf.gz"), path("${sample_name}.vcf.gz.tbi"), emit: vcf + tuple val(meta), path("${sample_name}.g.vcf.gz"), path("${sample_name}.g.vcf.gz.tbi"), emit: gvcf + tuple val(meta), path("*html"), emit: report + + script: + + """ + + /opt/deepvariant/bin/run_deepvariant \ + --model_type ${model_type} \ + --ref ${ref_fasta} \ + --reads ${chr_bam} \ + --output_vcf ${sample_name}.vcf.gz \ + --output_gvcf ${sample_name}.g.vcf.gz \ + --num_shards ${task.cpus} \ + --regions ${regions} + + """ +} diff --git a/modules/local/software/samtools/faidx/functions.nf b/modules/local/software/samtools/faidx/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/local/software/samtools/faidx/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/software/samtools/faidx/main.nf b/modules/local/software/samtools/faidx/main.nf new file mode 100644 index 0000000..d754e29 --- /dev/null +++ b/modules/local/software/samtools/faidx/main.nf @@ -0,0 +1,32 @@ +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SAMTOOLS_FAIDX { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: meta.id) } + + conda(params.enable_conda ? "bioconda::samtools=1.10" : null) + if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2" + } else { + container "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*fasta*"), emit: fasta_and_fai + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + """ + samtools faidx $fasta + echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/subworkflow/input_check.nf b/modules/local/subworkflow/input_check.nf new file mode 100644 index 0000000..cb6a902 --- /dev/null +++ b/modules/local/subworkflow/input_check.nf @@ -0,0 +1,23 @@ +/* + * Check input samplesheet and get read channels + */ + +params.options = [:] + +include { + SAMPLESHEET_CHECK; + get_samplesheet_paths } from '../process/samplesheet_check' addParams( options: params.options ) + +workflow INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + + main: + SAMPLESHEET_CHECK ( samplesheet ) + .splitCsv ( header:true, sep:',' ) + .map { get_samplesheet_paths(it) } + .set { reads } + + emit: + reads // channel: [ val(meta), [ reads ] ] +} diff --git a/modules/nf-core/software/fastqc/functions.nf b/modules/nf-core/software/fastqc/functions.nf new file mode 100644 index 0000000..d25eea8 --- /dev/null +++ b/modules/nf-core/software/fastqc/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/fastqc/main.nf b/modules/nf-core/software/fastqc/main.nf new file mode 100644 index 0000000..3edc97a --- /dev/null +++ b/modules/nf-core/software/fastqc/main.nf @@ -0,0 +1,43 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process FASTQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "quay.io/biocontainers/fastqc:0.11.9--0" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "*.version.txt" , emit: version + + script: + // Add soft-links to original FastQs for consistent naming in pipeline + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } +} diff --git a/modules/nf-core/software/fastqc/meta.yml b/modules/nf-core/software/fastqc/meta.yml new file mode 100644 index 0000000..337fc37 --- /dev/null +++ b/modules/nf-core/software/fastqc/meta.yml @@ -0,0 +1,67 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/nextflow.config b/nextflow.config index 51eb851..ed9ed5a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,94 +3,104 @@ * nf-core/deepvariant Nextflow config file * ------------------------------------------------- * Default config options for all environments. - * Cluster-specific config options should be saved - * in the conf folder and imported under a profile - * name here. */ - // Global default params, used in configs - params { - - container = 'nfcore/deepvariant:1.0' - - help = false - outdir = 'results' - email = false - name = false - - // BAM files - bam=false - bam_folder=false - bam_file_prefix="*" - getBai=false - - // Reference genomes - genome = false - genomes_base = 's3://deepvariant-data/genomes' - testBaseFolder = 's3://deepvariant-test/input' - - // Exome data - exome=false - bed=false - - // Params for the Read Group Line to be added just in case its needed. - rgid=4 - rglb="lib1" - rgpl="illumina" - rgpu="unit1" - rgsm=20 - - tracedir = "${params.outdir}/pipeline_info" - clusterOptions = false - awsqueue = false - awsregion = 'eu-west-1' - manifest.version = '1.0' - } +// Global default params, used in configs +params { + + // TODO nf-core: Specify your pipeline's command line flags + // Input options + input = '' + + // References + genome = '' + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + + // MultiQC options + multiqc_config = '' + multiqc_title = '' + max_multiqc_email_size = '25.MB' + + // Boilerplate options + enable_conda = false + outdir = './results' + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = '' + email_on_fail = '' + plaintext_email = false + monochrome_logs = false + help = false + clusterOptions = '' + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = [:] + config_profile_description = '' + config_profile_contact = '' + config_profile_url = '' + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' -profiles { +} - standard { - includeConfig 'conf/base.config' - includeConfig 'conf/genomes.config' - } - conda { process.conda = "$baseDir/environment.yml" } - docker { docker.enabled = true } - singularity { - singularity.enabled = true - } +// Container slug. Stable releases should specify release tag! +// Developmental code should specify :dev +process.container = 'nfcore/deepvariant:dev' - binac { - includeConfig 'conf/base.config' - includeConfig 'conf/binac.config' - includeConfig 'conf/genomes.config' - } - awsbatch { - includeConfig 'conf/base.config' - includeConfig 'conf/awsbatch.config' - includeConfig 'conf/genomes.config' +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +profiles { + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { params.enable_conda = true } + docker { + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' } - test { - includeConfig 'conf/base.config' - includeConfig 'conf/test.config' - includeConfig 'conf/genomes.config' + singularity { + singularity.enabled = true + singularity.autoMounts = true } - debug { process.beforeScript = 'echo $HOSTNAME' } - none { - // Don't load any config (for use with custom home configs) + podman { + podman.enabled = true } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } +} - // Profile for testing s3 environment - test_s3{ - includeConfig 'conf/base.config' - params.fasta="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta" - params.fai="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.fai" - params.fastagz="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz" - params.gzfai="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz.fai" - params.gzi="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz.gzi" - params.bam_folder="${params.testBaseFolder}" - params.bed = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/test_nist.b37_chr20_100kbp_at_10mb.bed' - } +// Load igenomes.config if required +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] +} +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" } // Capture exit codes from upstream processes when piping @@ -98,37 +108,37 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/nf-core/deepvariant_timeline.html" + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/nf-core/deepvariant_report.html" + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/nf-core/deepvariant_trace.txt" + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/nf-core/deepvariant_dag.svg" + file = "${params.tracedir}/pipeline_dag.svg" } manifest { - name = 'nf-core/deepvariant' - author = 'Phil Palmer' - homePage = 'https://github.com/nf-core/deepvariant' - description = 'Google DeepVariant variant caller as a Nextflow pipeline' - mainScript = 'main.nf' - nextflowVersion = '>=18.10.1' - version = '1.0' + name = 'nf-core/deepvariant' + author = 'Abhinav Sharma' + homePage = 'https://github.com/nf-core/deepvariant' + description = 'Google's DeepVariant as a Nextflow workflow.' + mainScript = 'main.nf' + nextflowVersion = '>=20.04.0' + version = '1.0dev' } // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if(type == 'memory'){ + if (type == 'memory') { try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit else return obj @@ -136,9 +146,9 @@ def check_max(obj, type) { println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" return obj } - } else if(type == 'time'){ + } else if (type == 'time') { try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration else return obj @@ -146,7 +156,7 @@ def check_max(obj, type) { println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" return obj } - } else if(type == 'cpus'){ + } else if (type == 'cpus') { try { return Math.min( obj, params.max_cpus as int ) } catch (all) { diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 0000000..9f1f670 --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,253 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/deepvariant/master/nextflow_schema.json", + "title": "nf-core/deepvariant pipeline parameters", + "description": "Google's DeepVariant as a Nextflow workflow.", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "input" + ], + "properties": { + "input": { + "type": "string", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a samplesheet with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/deepvariant/docs/usage#--input).", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "description": "The output directory where the results will be saved.", + "default": "./results", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for the reference genome indices used to align reads.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "fa_icon": "fas fa-font", + "description": "Path to FASTA genome file.", + "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." + }, + "igenomes_base": { + "type": "string", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes/", + "fa_icon": "fas fa-cloud-download-alt", + "hidden": true + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "hidden": true, + "fa_icon": "fas fa-question-circle" + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "hidden": true, + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ] + }, + "name": { + "type": "string", + "description": "Workflow name.", + "fa_icon": "fas fa-fingerprint", + "hidden": true, + "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true, + "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true, + "help_text": "Set to receive plain-text e-mails instead of HTML formatted." + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true, + "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true, + "help_text": "Set to disable colourful command line output and live life in monochrome." + }, + "multiqc_config": { + "type": "string", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog", + "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", + "fa_icon": "fas fa-users-cog" + }, + "hostnames": { + "type": "string", + "description": "Institutional configs hostname.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + } + ] +} diff --git a/pics/pic_workflow.jpg b/pics/pic_workflow.jpg deleted file mode 100644 index 4cb296b..0000000 Binary files a/pics/pic_workflow.jpg and /dev/null differ